From 68c747abd95561da1c48b4d492d599df89757821 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sun, 15 Feb 2026 17:47:49 +0900 Subject: [PATCH v43 7/8] Row pattern recognition patch (tests). --- src/test/regress/expected/rpr.out | 4006 +++++++++++++++ src/test/regress/expected/rpr_base.out | 5538 +++++++++++++++++++++ src/test/regress/expected/rpr_explain.out | 3863 ++++++++++++++ src/test/regress/expected/rpr_nfa.out | 2524 ++++++++++ src/test/regress/parallel_schedule | 5 + src/test/regress/sql/rpr.sql | 2180 ++++++++ src/test/regress/sql/rpr_base.sql | 3658 ++++++++++++++ src/test/regress/sql/rpr_explain.sql | 2254 +++++++++ src/test/regress/sql/rpr_nfa.sql | 1865 +++++++ 9 files changed, 25893 insertions(+) create mode 100644 src/test/regress/expected/rpr.out create mode 100644 src/test/regress/expected/rpr_base.out create mode 100644 src/test/regress/expected/rpr_explain.out create mode 100644 src/test/regress/expected/rpr_nfa.out create mode 100644 src/test/regress/sql/rpr.sql create mode 100644 src/test/regress/sql/rpr_base.sql create mode 100644 src/test/regress/sql/rpr_explain.sql create mode 100644 src/test/regress/sql/rpr_nfa.sql diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out new file mode 100644 index 00000000000..c921badb006 --- /dev/null +++ b/src/test/regress/expected/rpr.out @@ -0,0 +1,4006 @@ +-- +-- Test for row pattern definition clause +-- +CREATE TEMP TABLE stock ( + company TEXT, + tdate DATE, + price INTEGER +); +INSERT INTO stock VALUES ('company1', '2023-07-01', 100); +INSERT INTO stock VALUES ('company1', '2023-07-02', 200); +INSERT INTO stock VALUES ('company1', '2023-07-03', 150); +INSERT INTO stock VALUES ('company1', '2023-07-04', 140); +INSERT INTO stock VALUES ('company1', '2023-07-05', 150); +INSERT INTO stock VALUES ('company1', '2023-07-06', 90); +INSERT INTO stock VALUES ('company1', '2023-07-07', 110); +INSERT INTO stock VALUES ('company1', '2023-07-08', 130); +INSERT INTO stock VALUES ('company1', '2023-07-09', 120); +INSERT INTO stock VALUES ('company1', '2023-07-10', 130); +INSERT INTO stock VALUES ('company2', '2023-07-01', 50); +INSERT INTO stock VALUES ('company2', '2023-07-02', 2000); +INSERT INTO stock VALUES ('company2', '2023-07-03', 1500); +INSERT INTO stock VALUES ('company2', '2023-07-04', 1400); +INSERT INTO stock VALUES ('company2', '2023-07-05', 1500); +INSERT INTO stock VALUES ('company2', '2023-07-06', 60); +INSERT INTO stock VALUES ('company2', '2023-07-07', 1100); +INSERT INTO stock VALUES ('company2', '2023-07-08', 1300); +INSERT INTO stock VALUES ('company2', '2023-07-09', 1200); +INSERT INTO stock VALUES ('company2', '2023-07-10', 1300); +SELECT * FROM stock; + company | tdate | price +----------+------------+------- + company1 | 07-01-2023 | 100 + company1 | 07-02-2023 | 200 + company1 | 07-03-2023 | 150 + company1 | 07-04-2023 | 140 + company1 | 07-05-2023 | 150 + company1 | 07-06-2023 | 90 + company1 | 07-07-2023 | 110 + company1 | 07-08-2023 | 130 + company1 | 07-09-2023 | 120 + company1 | 07-10-2023 | 130 + company2 | 07-01-2023 | 50 + company2 | 07-02-2023 | 2000 + company2 | 07-03-2023 | 1500 + company2 | 07-04-2023 | 1400 + company2 | 07-05-2023 | 1500 + company2 | 07-06-2023 | 60 + company2 | 07-07-2023 | 1100 + company2 | 07-08-2023 | 1300 + company2 | 07-09-2023 | 1200 + company2 | 07-10-2023 | 1300 +(20 rows) + +-- basic test using PREV +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- basic test using PREV. UP appears twice +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+ UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 150 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 130 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1500 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1300 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- basic test using PREV. Use '*' +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP* DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | 150 | 90 | 07-06-2023 + company1 | 07-06-2023 | 90 | | | + company1 | 07-07-2023 | 110 | 110 | 120 | 07-08-2023 + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | 1500 | 60 | 07-06-2023 + company2 | 07-06-2023 | 60 | | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 07-08-2023 + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- basic test using PREV. Use '?' +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP? DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | 150 | 90 | 07-06-2023 + company1 | 07-06-2023 | 90 | | | + company1 | 07-07-2023 | 110 | 110 | 120 | 07-08-2023 + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | 1500 | 60 | 07-06-2023 + company2 | 07-06-2023 | 60 | | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 07-08-2023 + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- test using alternation (|) with sequence +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | 150 | 140 + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | 120 | 130 + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | 1500 | 1400 + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | 1200 | 1300 + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using alternation (|) with group quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 130 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1300 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using nested alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START ((UP DOWN) | FLAT)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + FLAT AS price = PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 150 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 90 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 120 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1500 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 60 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using group with quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((UP DOWN)+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 200 | 150 + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | 130 | 120 + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 2000 | 1500 + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | 1300 | 1200 + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using absolute threshold values (not relative PREV) +-- HIGH: price > 150, LOW: price < 100, MID: neutral range +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW MID* HIGH) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1100 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test threshold-based pattern with alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW (MID | HIGH)+) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | 90 | 130 + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1500 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1300 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- basic test with none-greedy pattern +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 3 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n} quantifier (A A A should be optimized to A{3}) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price >= 140 AND price <= 150 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 3 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n,} quantifier (2 or more) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n,m} quantifier (2 to 4) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,4}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- last_value() should remain consistent +SELECT company, tdate, price, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | last_value +----------+------------+-------+------------ + company1 | 07-01-2023 | 100 | 140 + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | 120 + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | 1400 + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | 1200 + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + +-- omit "START" in DEFINE but it is ok because "START AS TRUE" is +-- implicitly defined. per spec. +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- the first row start with less than or equal to 100 +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | 90 | 120 + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1200 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- second row raises 120% +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price) * 1.2, + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- using NEXT +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 150 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 1500 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 150 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 1500 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- match everything +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+) + DEFINE + A AS TRUE +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 130 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1300 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- nth_value beyond reduced frame (no IGNORE NULLS) +-- Tests WinGetSlotInFrame/WinGetFuncArgInFrame out-of-frame with RPR +SELECT company, tdate, price, + nth_value(price, 5) OVER w AS nth_5 +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | nth_5 +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 + company1 | 07-03-2023 | 150 | 07-03-2023 | 07-05-2023 + company1 | 07-04-2023 | 140 | 07-04-2023 | 07-05-2023 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 + company1 | 07-08-2023 | 130 | 07-08-2023 | 07-10-2023 + company1 | 07-09-2023 | 120 | 07-09-2023 | 07-10-2023 + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 + company2 | 07-03-2023 | 1500 | 07-03-2023 | 07-05-2023 + company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-05-2023 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 + company2 | 07-08-2023 | 1300 | 07-08-2023 | 07-10-2023 + company2 | 07-09-2023 | 1200 | 07-09-2023 | 07-10-2023 + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- SKIP TO NEXT ROW with limited frame (Ishii-san's test case) +-- Each row should produce its own match within its frame +WITH data AS ( + SELECT * FROM (VALUES + ('A', 1), ('A', 2), + ('B', 3), ('B', 4) + ) AS t(gid, id) +) +SELECT gid, id, array_agg(id) OVER w +FROM data +WINDOW w AS ( + PARTITION BY gid + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS id < 10 +); + gid | id | array_agg +-----+----+----------- + A | 1 | {1,2} + A | 2 | {2} + B | 3 | {3,4} + B | 4 | {4} +(4 rows) + +-- Limited frame with absorption test +-- Row 0: frame [0,2], can't see B at row 3 -> no match +-- Row 1: frame [1,3], can see A A B -> should match rows 1-3 +WITH frame_absorb_test AS ( + SELECT * FROM (VALUES + (0, 'A'), (1, 'A'), (2, 'A'), (3, 'B') + ) AS t(id, flag) +) +SELECT id, flag, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM frame_absorb_test +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS flag = 'A', + B AS flag = 'B' +); + id | flag | match_start | match_end +----+------+-------------+----------- + 0 | A | | + 1 | A | 1 | 3 + 2 | A | | + 3 | B | | +(4 rows) + +-- ROWS BETWEEN CURRENT ROW AND offset FOLLOWING +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w, + count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | 07-01-2023 | 07-03-2023 | 3 + company1 | 07-02-2023 | 200 | | | 0 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | 07-04-2023 | 07-06-2023 | 3 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-09-2023 | 3 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | 07-01-2023 | 07-03-2023 | 3 + company2 | 07-02-2023 | 2000 | | | 0 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-06-2023 | 3 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-09-2023 | 3 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- +-- Aggregates +-- +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP PAST LAST ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | max | min | sum | avg | count +----------+------------+-------+-------------+------------+------+-----+------+-----------------------+------- + company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 + company1 | 07-02-2023 | 200 | | | | | | | 0 + company1 | 07-03-2023 | 150 | | | | | | | 0 + company1 | 07-04-2023 | 140 | | | | | | | 0 + company1 | 07-05-2023 | 150 | | | | | | | 0 + company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 + company1 | 07-07-2023 | 110 | | | | | | | 0 + company1 | 07-08-2023 | 130 | | | | | | | 0 + company1 | 07-09-2023 | 120 | | | | | | | 0 + company1 | 07-10-2023 | 130 | | | | | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 + company2 | 07-02-2023 | 2000 | | | | | | | 0 + company2 | 07-03-2023 | 1500 | | | | | | | 0 + company2 | 07-04-2023 | 1400 | | | | | | | 0 + company2 | 07-05-2023 | 1500 | | | | | | | 0 + company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 + company2 | 07-07-2023 | 1100 | | | | | | | 0 + company2 | 07-08-2023 | 1300 | | | | | | | 0 + company2 | 07-09-2023 | 1200 | | | | | | | 0 + company2 | 07-10-2023 | 1300 | | | | | | | 0 +(20 rows) + +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP TO NEXT ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | max | min | sum | avg | count +----------+------------+-------+-------------+------------+------+------+------+-----------------------+------- + company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 + company1 | 07-02-2023 | 200 | | | | | | | 0 + company1 | 07-03-2023 | 150 | | | | | | | 0 + company1 | 07-04-2023 | 140 | 140 | 90 | 150 | 90 | 380 | 126.6666666666666667 | 3 + company1 | 07-05-2023 | 150 | | | | | | | 0 + company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 + company1 | 07-07-2023 | 110 | 110 | 120 | 130 | 110 | 360 | 120.0000000000000000 | 3 + company1 | 07-08-2023 | 130 | | | | | | | 0 + company1 | 07-09-2023 | 120 | | | | | | | 0 + company1 | 07-10-2023 | 130 | | | | | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 + company2 | 07-02-2023 | 2000 | | | | | | | 0 + company2 | 07-03-2023 | 1500 | | | | | | | 0 + company2 | 07-04-2023 | 1400 | 1400 | 60 | 1500 | 60 | 2960 | 986.6666666666666667 | 3 + company2 | 07-05-2023 | 1500 | | | | | | | 0 + company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 1300 | 1100 | 3600 | 1200.0000000000000000 | 3 + company2 | 07-08-2023 | 1300 | | | | | | | 0 + company2 | 07-09-2023 | 1200 | | | | | | | 0 + company2 | 07-10-2023 | 1300 | | | | | | | 0 +(20 rows) + +-- JOIN case +CREATE TEMP TABLE t1 (i int, v1 int); +CREATE TEMP TABLE t2 (j int, v2 int); +INSERT INTO t1 VALUES(1,10); +INSERT INTO t1 VALUES(1,11); +INSERT INTO t1 VALUES(1,12); +INSERT INTO t2 VALUES(2,10); +INSERT INTO t2 VALUES(2,11); +INSERT INTO t2 VALUES(2,12); +SELECT * FROM t1, t2 WHERE t1.v1 <= 11 AND t2.v2 <= 11; + i | v1 | j | v2 +---+----+---+---- + 1 | 10 | 2 | 10 + 1 | 10 | 2 | 11 + 1 | 11 | 2 | 10 + 1 | 11 | 2 | 11 +(4 rows) + +SELECT *, count(*) OVER w FROM t1, t2 +WINDOW w AS ( + PARTITION BY t1.i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE + A AS v1 <= 11 AND v2 <= 11 +); + i | v1 | j | v2 | count +---+----+---+----+------- + 1 | 10 | 2 | 10 | 1 + 1 | 10 | 2 | 11 | 1 + 1 | 10 | 2 | 12 | 0 + 1 | 11 | 2 | 10 | 1 + 1 | 11 | 2 | 11 | 1 + 1 | 11 | 2 | 12 | 0 + 1 | 12 | 2 | 10 | 0 + 1 | 12 | 2 | 11 | 0 + 1 | 12 | 2 | 12 | 0 +(9 rows) + +-- WITH case +WITH wstock AS ( + SELECT * FROM stock WHERE tdate < '2023-07-08' +) +SELECT tdate, price, +first_value(tdate) OVER w, +count(*) OVER w + FROM wstock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + tdate | price | first_value | count +------------+-------+-------------+------- + 07-01-2023 | 100 | 07-01-2023 | 4 + 07-02-2023 | 200 | | 0 + 07-03-2023 | 150 | | 0 + 07-04-2023 | 140 | | 0 + 07-05-2023 | 150 | | 0 + 07-06-2023 | 90 | | 0 + 07-07-2023 | 110 | | 0 + 07-01-2023 | 50 | 07-01-2023 | 4 + 07-02-2023 | 2000 | | 0 + 07-03-2023 | 1500 | | 0 + 07-04-2023 | 1400 | | 0 + 07-05-2023 | 1500 | | 0 + 07-06-2023 | 60 | | 0 + 07-07-2023 | 1100 | | 0 +(14 rows) + +-- ReScan test: LATERAL join forces WindowAgg rescan with RPR +-- Tests ExecReScanWindowAgg clearing prev_slot/next_slot +SELECT g.x, sub.* +FROM generate_series(1, 2) g(x), +LATERAL ( + SELECT id, price, count(*) OVER w AS c + FROM (VALUES (1, 100), (2, 200), (3, 150)) AS t(id, price) + WHERE id <= g.x + 1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price) + ) +) sub +ORDER BY g.x, sub.id; + x | id | price | c +---+----+-------+--- + 1 | 1 | 100 | 2 + 1 | 2 | 200 | 0 + 2 | 1 | 100 | 2 + 2 | 2 | 200 | 0 + 2 | 3 | 150 | 0 +(5 rows) + +-- PREV has multiple column reference +CREATE TEMP TABLE rpr1 (id INTEGER, i SERIAL, j INTEGER); +INSERT INTO rpr1(id, j) SELECT 1, g*2 FROM generate_series(1, 10) AS g; +SELECT id, i, j, count(*) OVER w + FROM rpr1 + WINDOW w AS ( + PARTITION BY id + ORDER BY i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (START COND+) + DEFINE + START AS TRUE, + COND AS PREV(i + j + 1) < 10 +); + id | i | j | count +----+----+----+------- + 1 | 1 | 2 | 3 + 1 | 2 | 4 | 0 + 1 | 3 | 6 | 0 + 1 | 4 | 8 | 0 + 1 | 5 | 10 | 0 + 1 | 6 | 12 | 0 + 1 | 7 | 14 | 0 + 1 | 8 | 16 | 0 + 1 | 9 | 18 | 0 + 1 | 10 | 20 | 0 +(10 rows) + +-- Smoke test for larger partitions. +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r+ ) + DEFINE r AS TRUE + ) +) +-- Should be exactly one long match across all rows. +SELECT * FROM s WHERE c > 0; + v | c +---+------ + 1 | 5000 +(1 row) + +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r ) + DEFINE r AS TRUE + ) +) +-- Every row should be its own match. +SELECT count(*) FROM s WHERE c > 0; + count +------- + 5000 +(1 row) + +-- Large partition test: 100K rows with A+ B* C{10000,} pattern +-- Tests that int32 count doesn't overflow with large repetitions +WITH data AS ( + SELECT generate_series(0, 100000) AS v +), +result AS ( + SELECT v, + count(*) OVER w AS match_len, + first_value(v) OVER w AS match_first, + last_value(v) OVER w AS match_last + FROM data + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B* C{10000,}) + DEFINE + A AS v < 33333, + B AS v >= 33333 AND v < 66666, + C AS v >= 66666 AND v < 99999 + ) +) +-- Should match: A (33333 rows) + B (33333 rows) + C (33333 rows) = 99999 rows +SELECT match_first, match_last, match_len FROM result WHERE match_len > 0; + match_first | match_last | match_len +-------------+------------+----------- + 0 | 99998 | 99999 +(1 row) + +-- +-- Using IGNORE NULLS +-- +-- no NULL rows case. The result should be identical with "basic test using PREV" +SELECT company, tdate, price, first_value(price) IGNORE NULLS OVER w, + last_value(price) IGNORE NULLS OVER w, + nth_value(tdate, 2) IGNORE NULLS OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- nth_value with IGNORE NULLS option wants to find the second row but +-- due a NULL in the midlle, it returns the third row. +WITH data AS ( + SELECT * FROM (VALUES + (10, 1), (11, NULL), (12, 3), (13, 4) + ) AS t(gid, id)) + SELECT gid, id, nth_value(id, 2) IGNORE NULLS OVER w AS second_val, + array_agg(id) OVER w + FROM data + WINDOW w AS ( + ORDER BY gid + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS gid < 13 + ); + gid | id | second_val | array_agg +-----+----+------------+------------ + 10 | 1 | 3 | {1,NULL,3} + 11 | | | + 12 | 3 | | + 13 | 4 | | +(4 rows) + +-- nth_value with IGNORE NULLS option wants to find the third row but +-- due a NULL in the midlle, it reaches the end of reduced frame and +-- return NULL +WITH data AS ( + SELECT * FROM (VALUES + (10, 1), (11, NULL), (12, 3), (13, 4) + ) AS t(gid, id)) + SELECT gid, id, nth_value(id, 3) IGNORE NULLS OVER w AS thrid_val, + array_agg(id) OVER w + FROM data + WINDOW w AS ( + ORDER BY gid + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS gid < 13 + ); + gid | id | thrid_val | array_agg +-----+----+-----------+------------ + 10 | 1 | | {1,NULL,3} + 11 | | | + 12 | 3 | | + 13 | 4 | | +(4 rows) + +-- nth_value beyond reduced frame with IGNORE NULLS +-- Tests ignorenulls_getfuncarginframe early out-of-frame check +SELECT company, tdate, price, + nth_value(price, 5) IGNORE NULLS OVER w AS nth_5_in +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | nth_5_in +----------+------------+-------+---------- + company1 | 07-01-2023 | 100 | + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + +-- View and pg_get_viewdef tests. +CREATE TEMP VIEW v_window AS +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); +SELECT * FROM v_window; + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +SELECT pg_get_viewdef('v_window'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + first_value(price) OVER w AS first_value, + + last_value(price) OVER w AS last_value, + + nth_value(tdate, 2) OVER w AS nth_second + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (start up+ down+) + + DEFINE + + start AS true, + + up AS (price > prev(price)), + + down AS (price < prev(price)) ); +(1 row) + +-- +-- Pattern optimization tests +-- VIEW shows original pattern, EXPLAIN shows optimized pattern +-- +-- Test: duplicate alternatives removal (A | B | A)+ -> (A | B)+ +CREATE TEMP VIEW v_opt_dup AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B | A)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup'); -- original: ((a | b | a)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | b | a)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup; -- optimized: ((a | b)+) + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_dup + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ +CREATE TEMP VIEW v_opt_dup_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B)+ | (A | B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup_group'); -- original: ((a | b)+ | (a | b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | b)+ | (a | b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup_group; -- optimized: ((a | b)+) + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_dup_group + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: consecutive vars merge (A A A) -> A{3} +CREATE TEMP VIEW v_opt_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); +SELECT pg_get_viewdef('v_opt_merge'); -- original: (a a a) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a a a) + + DEFINE + + a AS ((price >= 140) AND (price <= 150)) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge; -- optimized: a{3} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantified vars merge (A A+ A) -> A{3,} +CREATE TEMP VIEW v_opt_merge_quant AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A+ A) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_quant'); -- original: (a a+ a) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a a+ a) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_quant; -- optimized: a{3,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_quant + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: merge two unbounded (A+ A+) -> A{2,} +CREATE TEMP VIEW v_opt_merge_unbounded AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+ A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_unbounded'); -- original: (a+ a+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+ a+) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_unbounded; -- optimized: a{2,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_unbounded + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: merge with zero-min (A* A+) -> A+ +CREATE TEMP VIEW v_opt_merge_star AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A* A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_star'); -- original: (a* a+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a* a+) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_star; -- optimized: a+ + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_star + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: complex merge (A A{2} A+ A{3}) -> A{7,} +CREATE TEMP VIEW v_opt_merge_complex AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A{2} A+ A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_complex'); -- original: (a a{2} a+ a{3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a a{2} a+ a{3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_complex; -- optimized: a{7,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_complex + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{7,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge ((A B) (A B)+) -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group'); -- original: ((a b) (a b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b) (a b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group; -- expected: (a b){2,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge A B (A B)+ -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group2'); -- original: (a b (a b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b (a b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group2; -- expected: (a b){2,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group2 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge (A B) (A B)+ (A B) -> (A B){3,} +CREATE TEMP VIEW v_opt_merge_group3 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+ (A B)) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group3'); -- original: ((a b) (a b)+ (a b)) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b) (a b)+ (a b)) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group3; -- expected: (a b){3,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group3 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge A B A B (A B)+ A B A B -> (A B){5,} +CREATE TEMP VIEW v_opt_merge_group4 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B A B (A B)+ A B A B) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group4'); -- original: (a b a b (a b)+ a b a b) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b a b (a b)+ a b a b) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group4; -- expected: (a b){5,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group4 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){5,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge C A B (A B)+ A B C -> C (A B){3,} C +CREATE TEMP VIEW v_opt_merge_group5 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (C A B (A B)+ A B C) + DEFINE + A AS price > 100, + B AS price <= 100, + C AS price > 200 +); +SELECT pg_get_viewdef('v_opt_merge_group5'); -- original: (c a b (a b)+ a b c) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (c a b (a b)+ a b c) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100), + + c AS (price > 200) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group5; -- expected: c (a b){3,} c + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group5 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: c (a b){3,} c + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: consecutive GROUP merge (A B)+ (A B)+ -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_consec_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B)+ (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_consec_group'); -- original: ((a b)+ (a b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b)+ (a b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group; -- expected: (a b){2,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_consec_group + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: consecutive GROUP merge with different quantifiers (A B){2} (A B){3} -> (A B){5} +CREATE TEMP VIEW v_opt_merge_consec_group2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B){2} (A B){3}) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_consec_group2'); -- original: ((a b){2} (a b){3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b){2} (a b){3}) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group2; -- expected: (a b){5} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_consec_group2 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){5} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test {n} quantifier display +CREATE TEMP VIEW v_quantifier_n AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test {n,} quantifier display +CREATE TEMP VIEW v_quantifier_n_plus AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n_plus'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{2,}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test: flatten nested SEQ (A (B C)) -> A B C +CREATE TEMP VIEW v_opt_flatten_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A (B C)) + DEFINE + A AS price > 100, + B AS price > 150, + C AS price < 150 +); +SELECT pg_get_viewdef('v_opt_flatten_seq'); -- original: (a (b c)) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a (b c)) + + DEFINE + + a AS (price > 100), + + b AS (price > 150), + + c AS (price < 150) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_seq; -- optimized: a b c + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_flatten_seq + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: flatten nested ALT (A | (B | C)) -> (A | B | C) +CREATE TEMP VIEW v_opt_flatten_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | (B | C))+) + DEFINE + A AS price > 200, + B AS price > 100, + C AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_flatten_alt'); -- original: ((a | (b | c))+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | (b | c))+) + + DEFINE + + a AS (price > 200), + + b AS (price > 100), + + c AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_alt; -- optimized: ((a | b | c))+ + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_flatten_alt + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c)+ + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: unwrap GROUP{1,1} ((A)) -> A +CREATE TEMP VIEW v_opt_unwrap_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A))) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_unwrap_group'); -- original: (((a))) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (((a))) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_group; -- optimized: a + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_unwrap_group + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantifier multiplication (A{2}){3} -> A{6} +CREATE TEMP VIEW v_opt_quant_mult AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult'); -- original: ((a{2}){3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a{2}){3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult; -- optimized: a{6} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_quant_mult + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantifier multiplication (A{2,4}){3} -> A{6,12} +CREATE TEMP VIEW v_opt_quant_mult_range AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2,4}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range'); -- original: ((a{2,4}){3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a{2,4}){3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range; -- optimized: a{6,12} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_quant_mult_range + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6,12} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantifier multiplication blocked (A{2}){3,5} -> no change +-- outer range with child exact > 1 causes gaps (6,8,10 not 6,7,8,9,10) +CREATE TEMP VIEW v_opt_quant_mult_range2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3,5}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range2'); -- original: ((a{2}){3,5}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a{2}){3,5}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range2; -- NOT optimized: (a{2}){3,5} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_quant_mult_range2 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){3,5} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantifier multiplication blocked by INF (A+){3} -> no change +CREATE TEMP VIEW v_opt_quant_mult_inf AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A+){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_inf'); -- original: ((a+){3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a+){3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_inf; -- no multiply: (a+){3} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_quant_mult_inf + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+"){3} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: unwrap single-item ALT after duplicate removal (A | A) -> A +CREATE TEMP VIEW v_opt_unwrap_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | A)+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_unwrap_alt'); -- original: ((a | a)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | a)+) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_alt; -- optimized: a+ + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_unwrap_alt + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: GROUP{1,1} to SEQ with flatten ((A B)(C D)) -> A B C D +CREATE TEMP VIEW v_opt_group_to_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A B)(C D))) + DEFINE + A AS price > 200, + B AS price > 150, + C AS price > 100, + D AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_group_to_seq'); -- original: (((a b)(c d))) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (((a b) (c d))) + + DEFINE + + a AS (price > 200), + + b AS (price > 150), + + c AS (price > 100), + + d AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_group_to_seq; -- optimized: a b c d + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_group_to_seq + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: combined consecutive GROUP + prefix merge A B (A B)+ (A B)+ -> (A B){3,} +CREATE TEMP VIEW v_opt_combined_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B (A B)+ (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_combined_merge'); -- original: (a b (a b)+ (a b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b (a b)+ (a b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_combined_merge; -- expected: (a b){3,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_combined_merge + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: nested ALT pattern - bug reproduction +CREATE TEMP VIEW v_opt_nested_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A B) | C) D | A B C) + DEFINE + A AS price <= 100, + B AS price <= 150, + C AS price <= 200, + D AS price > 200 +); +SELECT pg_get_viewdef('v_opt_nested_alt'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (((a b) | c) d | a b c) + + DEFINE + + a AS (price <= 100), + + b AS (price <= 150), + + c AS (price <= 200), + + d AS (price > 200) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_nested_alt + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a b | c) d | a b c) + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: nested ALT with unbounded - A+ inside +CREATE TEMP VIEW v_opt_nested_alt2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A+ B) | C) D | A B C) + DEFINE + A AS price <= 100, + B AS price <= 150, + C AS price <= 200, + D AS price > 200 +); +SELECT pg_get_viewdef('v_opt_nested_alt2'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (((a+ b) | c) d | a b c) + + DEFINE + + a AS (price <= 100), + + b AS (price <= 150), + + c AS (price <= 200), + + d AS (price > 200) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt2; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_nested_alt2 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a+" b | c) d | a b c) + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- +-- Error cases +-- +-- row pattern definition variable name must not appear more than once +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + UP AS price > PREV(price) +); +ERROR: row pattern definition variable name "up" appears more than once in DEFINE clause +LINE 11: UP AS price > PREV(price), + ^ +-- subqueries in DEFINE clause are not supported +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START LOWPRICE) + DEFINE + START AS TRUE, + LOWPRICE AS price < (SELECT 100) +); +ERROR: cannot use subquery in DEFINE expression +LINE 11: LOWPRICE AS price < (SELECT 100) + ^ +-- aggregates in DEFINE clause are not supported +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START LOWPRICE) + DEFINE + START AS TRUE, + LOWPRICE AS price < count(*) +); +ERROR: aggregate functions are not allowed in DEFINE +LINE 11: LOWPRICE AS price < count(*) + ^ +-- FRAME must start at current row when row pattern recognition is used +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); +ERROR: FRAME must start at CURRENT ROW when row pattern recognition is used +LINE 6: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ^ +DETAIL: Current frame starts with UNBOUNDED PRECEDING. +HINT: Use: ROWS BETWEEN CURRENT ROW AND ... +-- EXCLUDE is not permitted +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE CURRENT ROW + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); +ERROR: EXCLUDE options are not permitted when row pattern recognition is used +LINE 7: EXCLUDE CURRENT ROW + ^ +DETAIL: Frame definition includes EXCLUDE CURRENT ROW. +HINT: Remove the EXCLUDE clause from the window definition. +-- SEEK is not supported +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + SEEK + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); +ERROR: SEEK is not supported +LINE 8: SEEK + ^ +HINT: Use INITIAL instead. +-- PREV's argument must have at least 1 column reference +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(1), + DOWN AS price < PREV(1) +); +ERROR: row pattern navigation operation's argument must include at least one column reference +-- Unsupported quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UP~ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(1), + DOWN AS price < PREV(1) +); +ERROR: unsupported quantifier "~" +LINE 9: PATTERN (START UP~ DOWN+) + ^ +HINT: Valid quantifiers are: *, +, ?, *?, +?, ??, {n}, {n,}, {,m}, {n,m} and their reluctant versions. +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UP+? DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(1), + DOWN AS price < PREV(1) +); +ERROR: reluctant quantifiers are not yet supported +LINE 9: PATTERN (START UP+? DOWN+) + ^ +-- Maximum pattern variables is 251 (RPR_VARID_MAX) +-- Error: 252 variables exceeds limit of 251 +DO $$ +DECLARE + pattern_vars text; + define_vars text; + query text; +BEGIN + SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), + string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') + INTO pattern_vars, define_vars + FROM generate_series(1, 252) i; + + query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (%s) + DEFINE %s)', pattern_vars, define_vars); + + EXECUTE query; +END; +$$; +ERROR: too many pattern variables +DETAIL: Maximum is 251. +CONTEXT: SQL statement "SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (v001 v002 v003 v004 v005 v006 v007 v008 v009 v010 v011 v012 v013 v014 v015 v016 v017 v018 v019 v020 v021 v022 v023 v024 v025 v026 v027 v028 v029 v030 v031 v032 v033 v034 v035 v036 v037 v038 v039 v040 v041 v042 v043 v044 v045 v046 v047 v048 v049 v050 v051 v052 v053 v054 v055 v056 v057 v058 v059 v060 v061 v062 v063 v064 v065 v066 v067 v068 v069 v070 v071 v072 v073 v074 v075 v076 v077 v078 v079 v080 v081 v082 v083 v084 v085 v086 v087 v088 v089 v090 v091 v092 v093 v094 v095 v096 v097 v098 v099 v100 v101 v102 v103 v104 v105 v106 v107 v108 v109 v110 v111 v112 v113 v114 v115 v116 v117 v118 v119 v120 v121 v122 v123 v124 v125 v126 v127 v128 v129 v130 v131 v132 v133 v134 v135 v136 v137 v138 v139 v140 v141 v142 v143 v144 v145 v146 v147 v148 v149 v150 v151 v152 v153 v154 v155 v156 v157 v158 v159 v160 v161 v162 v163 v164 v165 v166 v167 v168 v169 v170 v171 v172 v173 v174 v175 v176 v177 v178 v179 v180 v181 v182 v183 v184 v185 v186 v187 v188 v189 v190 v191 v192 v193 v194 v195 v196 v197 v198 v199 v200 v201 v202 v203 v204 v205 v206 v207 v208 v209 v210 v211 v212 v213 v214 v215 v216 v217 v218 v219 v220 v221 v222 v223 v224 v225 v226 v227 v228 v229 v230 v231 v232 v233 v234 v235 v236 v237 v238 v239 v240 v241 v242 v243 v244 v245 v246 v247 v248 v249 v250 v251 v252) + DEFINE v001 AS TRUE, v002 AS TRUE, v003 AS TRUE, v004 AS TRUE, v005 AS TRUE, v006 AS TRUE, v007 AS TRUE, v008 AS TRUE, v009 AS TRUE, v010 AS TRUE, v011 AS TRUE, v012 AS TRUE, v013 AS TRUE, v014 AS TRUE, v015 AS TRUE, v016 AS TRUE, v017 AS TRUE, v018 AS TRUE, v019 AS TRUE, v020 AS TRUE, v021 AS TRUE, v022 AS TRUE, v023 AS TRUE, v024 AS TRUE, v025 AS TRUE, v026 AS TRUE, v027 AS TRUE, v028 AS TRUE, v029 AS TRUE, v030 AS TRUE, v031 AS TRUE, v032 AS TRUE, v033 AS TRUE, v034 AS TRUE, v035 AS TRUE, v036 AS TRUE, v037 AS TRUE, v038 AS TRUE, v039 AS TRUE, v040 AS TRUE, v041 AS TRUE, v042 AS TRUE, v043 AS TRUE, v044 AS TRUE, v045 AS TRUE, v046 AS TRUE, v047 AS TRUE, v048 AS TRUE, v049 AS TRUE, v050 AS TRUE, v051 AS TRUE, v052 AS TRUE, v053 AS TRUE, v054 AS TRUE, v055 AS TRUE, v056 AS TRUE, v057 AS TRUE, v058 AS TRUE, v059 AS TRUE, v060 AS TRUE, v061 AS TRUE, v062 AS TRUE, v063 AS TRUE, v064 AS TRUE, v065 AS TRUE, v066 AS TRUE, v067 AS TRUE, v068 AS TRUE, v069 AS TRUE, v070 AS TRUE, v071 AS TRUE, v072 AS TRUE, v073 AS TRUE, v074 AS TRUE, v075 AS TRUE, v076 AS TRUE, v077 AS TRUE, v078 AS TRUE, v079 AS TRUE, v080 AS TRUE, v081 AS TRUE, v082 AS TRUE, v083 AS TRUE, v084 AS TRUE, v085 AS TRUE, v086 AS TRUE, v087 AS TRUE, v088 AS TRUE, v089 AS TRUE, v090 AS TRUE, v091 AS TRUE, v092 AS TRUE, v093 AS TRUE, v094 AS TRUE, v095 AS TRUE, v096 AS TRUE, v097 AS TRUE, v098 AS TRUE, v099 AS TRUE, v100 AS TRUE, v101 AS TRUE, v102 AS TRUE, v103 AS TRUE, v104 AS TRUE, v105 AS TRUE, v106 AS TRUE, v107 AS TRUE, v108 AS TRUE, v109 AS TRUE, v110 AS TRUE, v111 AS TRUE, v112 AS TRUE, v113 AS TRUE, v114 AS TRUE, v115 AS TRUE, v116 AS TRUE, v117 AS TRUE, v118 AS TRUE, v119 AS TRUE, v120 AS TRUE, v121 AS TRUE, v122 AS TRUE, v123 AS TRUE, v124 AS TRUE, v125 AS TRUE, v126 AS TRUE, v127 AS TRUE, v128 AS TRUE, v129 AS TRUE, v130 AS TRUE, v131 AS TRUE, v132 AS TRUE, v133 AS TRUE, v134 AS TRUE, v135 AS TRUE, v136 AS TRUE, v137 AS TRUE, v138 AS TRUE, v139 AS TRUE, v140 AS TRUE, v141 AS TRUE, v142 AS TRUE, v143 AS TRUE, v144 AS TRUE, v145 AS TRUE, v146 AS TRUE, v147 AS TRUE, v148 AS TRUE, v149 AS TRUE, v150 AS TRUE, v151 AS TRUE, v152 AS TRUE, v153 AS TRUE, v154 AS TRUE, v155 AS TRUE, v156 AS TRUE, v157 AS TRUE, v158 AS TRUE, v159 AS TRUE, v160 AS TRUE, v161 AS TRUE, v162 AS TRUE, v163 AS TRUE, v164 AS TRUE, v165 AS TRUE, v166 AS TRUE, v167 AS TRUE, v168 AS TRUE, v169 AS TRUE, v170 AS TRUE, v171 AS TRUE, v172 AS TRUE, v173 AS TRUE, v174 AS TRUE, v175 AS TRUE, v176 AS TRUE, v177 AS TRUE, v178 AS TRUE, v179 AS TRUE, v180 AS TRUE, v181 AS TRUE, v182 AS TRUE, v183 AS TRUE, v184 AS TRUE, v185 AS TRUE, v186 AS TRUE, v187 AS TRUE, v188 AS TRUE, v189 AS TRUE, v190 AS TRUE, v191 AS TRUE, v192 AS TRUE, v193 AS TRUE, v194 AS TRUE, v195 AS TRUE, v196 AS TRUE, v197 AS TRUE, v198 AS TRUE, v199 AS TRUE, v200 AS TRUE, v201 AS TRUE, v202 AS TRUE, v203 AS TRUE, v204 AS TRUE, v205 AS TRUE, v206 AS TRUE, v207 AS TRUE, v208 AS TRUE, v209 AS TRUE, v210 AS TRUE, v211 AS TRUE, v212 AS TRUE, v213 AS TRUE, v214 AS TRUE, v215 AS TRUE, v216 AS TRUE, v217 AS TRUE, v218 AS TRUE, v219 AS TRUE, v220 AS TRUE, v221 AS TRUE, v222 AS TRUE, v223 AS TRUE, v224 AS TRUE, v225 AS TRUE, v226 AS TRUE, v227 AS TRUE, v228 AS TRUE, v229 AS TRUE, v230 AS TRUE, v231 AS TRUE, v232 AS TRUE, v233 AS TRUE, v234 AS TRUE, v235 AS TRUE, v236 AS TRUE, v237 AS TRUE, v238 AS TRUE, v239 AS TRUE, v240 AS TRUE, v241 AS TRUE, v242 AS TRUE, v243 AS TRUE, v244 AS TRUE, v245 AS TRUE, v246 AS TRUE, v247 AS TRUE, v248 AS TRUE, v249 AS TRUE, v250 AS TRUE, v251 AS TRUE, v252 AS TRUE)" +PL/pgSQL function inline_code_block line 17 at EXECUTE +-- Error: 253 variables exceeds limit of 251 +DO $$ +DECLARE + pattern_vars text; + define_vars text; + query text; +BEGIN + SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), + string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') + INTO pattern_vars, define_vars + FROM generate_series(1, 253) i; + + query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (%s) + DEFINE %s)', pattern_vars, define_vars); + + EXECUTE query; +END; +$$; +ERROR: too many pattern variables +DETAIL: Maximum is 251. +CONTEXT: SQL statement "SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (v001 v002 v003 v004 v005 v006 v007 v008 v009 v010 v011 v012 v013 v014 v015 v016 v017 v018 v019 v020 v021 v022 v023 v024 v025 v026 v027 v028 v029 v030 v031 v032 v033 v034 v035 v036 v037 v038 v039 v040 v041 v042 v043 v044 v045 v046 v047 v048 v049 v050 v051 v052 v053 v054 v055 v056 v057 v058 v059 v060 v061 v062 v063 v064 v065 v066 v067 v068 v069 v070 v071 v072 v073 v074 v075 v076 v077 v078 v079 v080 v081 v082 v083 v084 v085 v086 v087 v088 v089 v090 v091 v092 v093 v094 v095 v096 v097 v098 v099 v100 v101 v102 v103 v104 v105 v106 v107 v108 v109 v110 v111 v112 v113 v114 v115 v116 v117 v118 v119 v120 v121 v122 v123 v124 v125 v126 v127 v128 v129 v130 v131 v132 v133 v134 v135 v136 v137 v138 v139 v140 v141 v142 v143 v144 v145 v146 v147 v148 v149 v150 v151 v152 v153 v154 v155 v156 v157 v158 v159 v160 v161 v162 v163 v164 v165 v166 v167 v168 v169 v170 v171 v172 v173 v174 v175 v176 v177 v178 v179 v180 v181 v182 v183 v184 v185 v186 v187 v188 v189 v190 v191 v192 v193 v194 v195 v196 v197 v198 v199 v200 v201 v202 v203 v204 v205 v206 v207 v208 v209 v210 v211 v212 v213 v214 v215 v216 v217 v218 v219 v220 v221 v222 v223 v224 v225 v226 v227 v228 v229 v230 v231 v232 v233 v234 v235 v236 v237 v238 v239 v240 v241 v242 v243 v244 v245 v246 v247 v248 v249 v250 v251 v252 v253) + DEFINE v001 AS TRUE, v002 AS TRUE, v003 AS TRUE, v004 AS TRUE, v005 AS TRUE, v006 AS TRUE, v007 AS TRUE, v008 AS TRUE, v009 AS TRUE, v010 AS TRUE, v011 AS TRUE, v012 AS TRUE, v013 AS TRUE, v014 AS TRUE, v015 AS TRUE, v016 AS TRUE, v017 AS TRUE, v018 AS TRUE, v019 AS TRUE, v020 AS TRUE, v021 AS TRUE, v022 AS TRUE, v023 AS TRUE, v024 AS TRUE, v025 AS TRUE, v026 AS TRUE, v027 AS TRUE, v028 AS TRUE, v029 AS TRUE, v030 AS TRUE, v031 AS TRUE, v032 AS TRUE, v033 AS TRUE, v034 AS TRUE, v035 AS TRUE, v036 AS TRUE, v037 AS TRUE, v038 AS TRUE, v039 AS TRUE, v040 AS TRUE, v041 AS TRUE, v042 AS TRUE, v043 AS TRUE, v044 AS TRUE, v045 AS TRUE, v046 AS TRUE, v047 AS TRUE, v048 AS TRUE, v049 AS TRUE, v050 AS TRUE, v051 AS TRUE, v052 AS TRUE, v053 AS TRUE, v054 AS TRUE, v055 AS TRUE, v056 AS TRUE, v057 AS TRUE, v058 AS TRUE, v059 AS TRUE, v060 AS TRUE, v061 AS TRUE, v062 AS TRUE, v063 AS TRUE, v064 AS TRUE, v065 AS TRUE, v066 AS TRUE, v067 AS TRUE, v068 AS TRUE, v069 AS TRUE, v070 AS TRUE, v071 AS TRUE, v072 AS TRUE, v073 AS TRUE, v074 AS TRUE, v075 AS TRUE, v076 AS TRUE, v077 AS TRUE, v078 AS TRUE, v079 AS TRUE, v080 AS TRUE, v081 AS TRUE, v082 AS TRUE, v083 AS TRUE, v084 AS TRUE, v085 AS TRUE, v086 AS TRUE, v087 AS TRUE, v088 AS TRUE, v089 AS TRUE, v090 AS TRUE, v091 AS TRUE, v092 AS TRUE, v093 AS TRUE, v094 AS TRUE, v095 AS TRUE, v096 AS TRUE, v097 AS TRUE, v098 AS TRUE, v099 AS TRUE, v100 AS TRUE, v101 AS TRUE, v102 AS TRUE, v103 AS TRUE, v104 AS TRUE, v105 AS TRUE, v106 AS TRUE, v107 AS TRUE, v108 AS TRUE, v109 AS TRUE, v110 AS TRUE, v111 AS TRUE, v112 AS TRUE, v113 AS TRUE, v114 AS TRUE, v115 AS TRUE, v116 AS TRUE, v117 AS TRUE, v118 AS TRUE, v119 AS TRUE, v120 AS TRUE, v121 AS TRUE, v122 AS TRUE, v123 AS TRUE, v124 AS TRUE, v125 AS TRUE, v126 AS TRUE, v127 AS TRUE, v128 AS TRUE, v129 AS TRUE, v130 AS TRUE, v131 AS TRUE, v132 AS TRUE, v133 AS TRUE, v134 AS TRUE, v135 AS TRUE, v136 AS TRUE, v137 AS TRUE, v138 AS TRUE, v139 AS TRUE, v140 AS TRUE, v141 AS TRUE, v142 AS TRUE, v143 AS TRUE, v144 AS TRUE, v145 AS TRUE, v146 AS TRUE, v147 AS TRUE, v148 AS TRUE, v149 AS TRUE, v150 AS TRUE, v151 AS TRUE, v152 AS TRUE, v153 AS TRUE, v154 AS TRUE, v155 AS TRUE, v156 AS TRUE, v157 AS TRUE, v158 AS TRUE, v159 AS TRUE, v160 AS TRUE, v161 AS TRUE, v162 AS TRUE, v163 AS TRUE, v164 AS TRUE, v165 AS TRUE, v166 AS TRUE, v167 AS TRUE, v168 AS TRUE, v169 AS TRUE, v170 AS TRUE, v171 AS TRUE, v172 AS TRUE, v173 AS TRUE, v174 AS TRUE, v175 AS TRUE, v176 AS TRUE, v177 AS TRUE, v178 AS TRUE, v179 AS TRUE, v180 AS TRUE, v181 AS TRUE, v182 AS TRUE, v183 AS TRUE, v184 AS TRUE, v185 AS TRUE, v186 AS TRUE, v187 AS TRUE, v188 AS TRUE, v189 AS TRUE, v190 AS TRUE, v191 AS TRUE, v192 AS TRUE, v193 AS TRUE, v194 AS TRUE, v195 AS TRUE, v196 AS TRUE, v197 AS TRUE, v198 AS TRUE, v199 AS TRUE, v200 AS TRUE, v201 AS TRUE, v202 AS TRUE, v203 AS TRUE, v204 AS TRUE, v205 AS TRUE, v206 AS TRUE, v207 AS TRUE, v208 AS TRUE, v209 AS TRUE, v210 AS TRUE, v211 AS TRUE, v212 AS TRUE, v213 AS TRUE, v214 AS TRUE, v215 AS TRUE, v216 AS TRUE, v217 AS TRUE, v218 AS TRUE, v219 AS TRUE, v220 AS TRUE, v221 AS TRUE, v222 AS TRUE, v223 AS TRUE, v224 AS TRUE, v225 AS TRUE, v226 AS TRUE, v227 AS TRUE, v228 AS TRUE, v229 AS TRUE, v230 AS TRUE, v231 AS TRUE, v232 AS TRUE, v233 AS TRUE, v234 AS TRUE, v235 AS TRUE, v236 AS TRUE, v237 AS TRUE, v238 AS TRUE, v239 AS TRUE, v240 AS TRUE, v241 AS TRUE, v242 AS TRUE, v243 AS TRUE, v244 AS TRUE, v245 AS TRUE, v246 AS TRUE, v247 AS TRUE, v248 AS TRUE, v249 AS TRUE, v250 AS TRUE, v251 AS TRUE, v252 AS TRUE, v253 AS TRUE)" +PL/pgSQL function inline_code_block line 17 at EXECUTE + CREATE TEMP TABLE stock_null (company TEXT, tdate DATE, price INTEGER); + INSERT INTO stock_null VALUES ('c1', '2023-07-01', 100); + INSERT INTO stock_null VALUES ('c1', '2023-07-02', NULL); -- NULL in middle + INSERT INTO stock_null VALUES ('c1', '2023-07-03', 200); + INSERT INTO stock_null VALUES ('c1', '2023-07-04', 150); + SELECT company, tdate, price, count(*) OVER w AS match_count + FROM stock_null + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP DOWN) + DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < +PREV(price) + ); + company | tdate | price | match_count +---------+------------+-------+------------- + c1 | 07-01-2023 | 100 | 0 + c1 | 07-02-2023 | | 0 + c1 | 07-03-2023 | 200 | 0 + c1 | 07-04-2023 | 150 | 0 +(4 rows) + +-- Overlapping match tests (requires multi-context for correct behavior) +-- Using array flags: 'X' = ANY(flags) for multi-TRUE support +-- Test 1: A B C D E | B C D | C D E F - three overlapping patterns +-- Different end points: B C D (4), A B C D E (5), C D E F (6) +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | 2 | 4 + 3 | {C} | 3 | 6 + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +-- PAST LAST: only one match +-- TO NEXT ROW with multi-context: three matches +-- Row 1: A B C D E (1-5) +-- Row 2: B C D (2-4) <- ends first! +-- Row 3: C D E F (3-6) <- ends last! +-- Test 1b: Longer pattern FAILS, shorter pattern should survive +-- Pattern: A+ B C D E | B+ C +-- A+ B C D E fails (no E found in sequence) +-- B+ C matches at rows 2-3 +-- Result: match 2-3 (B+ C) +WITH test_overlap1b AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1b +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C D E | B+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {B} | 2 | 3 + 3 | {C} | | + 4 | {D} | | + 5 | {X} | | +(5 rows) + +-- Test 2: A B+ C | B+ D - long B sequence with different endings +WITH test_overlap2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['B']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+ C | B+ D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {B} | | + 6 | {C} | | + 7 | {B} | 7 | 10 + 8 | {B} | 8 | 10 + 9 | {B} | 9 | 10 + 10 | {D} | | +(10 rows) + +-- Current result (correct): +-- Row 1: A B+ C (1-6) +-- Row 7-9: B+ D (7-10, 8-10, 9-10) +-- Note: Row 2-6 cannot match B+ D because Row 6 is C, not D +-- With absorption: 8-10 and 9-10 would be absorbed by 7-10 (earlier context covers later) +-- Test 3: Greedy quantifier with late failure - A B C+ D | A B +-- Pattern expects D after C+, but E comes instead ("betrayal") +WITH test_betrayal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['C']), + (5, ARRAY['C']), + (6, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_betrayal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C+ D | A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {C} | | + 4 | {C} | | + 5 | {C} | | + 6 | {E} | | +(6 rows) + +-- A B C+ D fails at Row 6 (E instead of D) +-- Question: Does it fallback to A B (1-2)? +-- Test 4: Lexical Order test - A B C | A B C D E +-- SQL standard: first matching alternative wins +WITH test_lexical AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C | A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | +(5 rows) + +-- SQL standard Lexical Order: A B C (1-3) wins (first alternative) +-- Test 4b: Reversed pattern order - A B C D E | A B C +WITH test_lexical2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | +(5 rows) + +-- SQL standard Lexical Order: A B C D E (1-5) wins (first alternative) +-- Test 5: Multiple TRUE in single row (overlapping pattern variables) +-- Each row matches multiple DEFINE conditions simultaneously +WITH test_multi_true AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- A and B both TRUE + (2, ARRAY['B','C']), -- B and C both TRUE + (3, ARRAY['C','D']), -- C and D both TRUE + (4, ARRAY['D','E']), -- D and E both TRUE + (5, ARRAY['E','_']) -- E only + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_true +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 5 + 2 | {B,C} | | + 3 | {C,D} | | + 4 | {D,E} | | + 5 | {E,_} | | +(5 rows) + +-- Row 1: A=T, B=T -> matches A +-- Row 2: B=T, C=T -> matches B +-- Row 3: C=T, D=T -> matches C +-- Row 4: D=T, E=T -> matches D +-- Row 5: E=T -> matches E +-- Result: match 1-5 (A B C D E) +-- Test 6: Diagonal pattern with multi-TRUE (shifted overlap) +WITH test_diagonal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['B','A']), + (3, ARRAY['C','B']), + (4, ARRAY['D','C']), + (5, ARRAY['_','D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_diagonal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {B,A} | 2 | 5 + 3 | {C,B} | | + 4 | {D,C} | | + 5 | {_,D} | | +(5 rows) + +-- Possible matches: +-- Start Row 1: A(1) B(2) C(3) D(4) -> 1-4 +-- Start Row 2: A(2) B(3) C(4) D(5) -> 2-5 (because Row 2 has A too!) +-- =================================================================== +-- Context Absorption Tests +-- =================================================================== +-- Test absorption 1: Basic A+ pattern - later contexts absorbed by earlier +WITH test_absorb_basic AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_basic +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {B} | | +(5 rows) + +-- Pattern A+ is absorbable (unbounded first element, only one unbounded) +-- 4 matches: (1-4, 2-4, 3-4, 4-4) +-- Test absorption 2: A+ B pattern - absorption with fixed suffix +WITH test_absorb_suffix AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_suffix +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) +-- All potential matches end at same row (row 4 with B) +-- 3 matches: (1-4, 2-4, 3-4) +-- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) +WITH test_absorb_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (B+ C | B+ D) + DEFINE + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 4 + 2 | {B} | 2 | 4 + 3 | {B} | 3 | 4 + 4 | {D} | | + 5 | {X} | | +(5 rows) + +-- Both branches B+ C and B+ D are absorbable (B+ unbounded first) +-- B+ D branch matches: 3 matches (1-4, 2-4, 3-4) +-- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) +WITH test_no_absorb AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_no_absorb +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Pattern A B+ is NOT absorbable (A bounded first, B+ unbounded but not first) +-- Only Row 1 can start match (only row with A), so only one match: 1-4 +-- Test absorption 5: GROUP merge enables absorption +WITH test_absorb_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B) (A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | | + 6 | {B} | | + 7 | {X} | | +(7 rows) + +-- Pattern optimized: (A B) (A B)+ -> (A B){2,} +-- 2 matches: 1-6 (3 reps), 3-6 (2 reps) +-- Test absorption 6: Multiple unbounded - first element unbounded enables absorption +WITH test_multi_unbounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_unbounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- 2 matches: 1-4, 2-4 (same endpoint 4) +-- ============================================ +-- Jacob's RPR Patterns (from jacob branch) +-- ============================================ +-- Test: A? (optional, greedy) +WITH jacob_optional AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_optional +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A?) + DEFINE A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {X} | | +(2 rows) + +-- Expected: 1-1 (matches A) +-- Test: A{2} (exact count) +WITH jacob_exact AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_exact +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2}) + DEFINE A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {A} | | + 3 | {A} | | + 4 | {X} | | +(4 rows) + +-- Expected: 1-2 +-- Test: A{1,3} (bounded range, greedy) +WITH jacob_bounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_bounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}) + DEFINE A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | 4 | 4 + 5 | {X} | | +(5 rows) + +-- Expected: 1-3 (greedy takes max), then 4-4 +-- Test: A | B (simple alternation) +WITH jacob_simple_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_simple_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {B} | 2 | 2 + 3 | {X} | | +(3 rows) + +-- Expected: 1-1 (A), 2-2 (B) +-- Test: A | B | C (three-way alternation) +WITH jacob_three_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_three_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B | C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 1 + 2 | {X} | | +(2 rows) + +-- Expected: 1-1 (B) +-- Test: A B C (concatenation) +WITH jacob_concat AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_concat +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | + 4 | {X} | | +(4 rows) + +-- Expected: 1-3 +-- Test: A B? C (optional middle) +WITH jacob_optional_mid AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), + (3, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_optional_mid +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {C} | | + 3 | {X} | | +(3 rows) + +-- Expected: 1-2 (A C, B skipped) +-- Test: (A B){2} (nested group with quantifier) +WITH jacob_nested_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_nested_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Expected: 1-4 (A B A B) +-- Test: (A){3} (quantifier on grouped single element) +WITH jacob_group_quant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_group_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A){3}) + DEFINE A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | | + 3 | {A} | | + 4 | {X} | | +(4 rows) + +-- Expected: 1-3 +-- Test: A B C | A B C D E (lexical order - first alt wins) +WITH jacob_lex_first AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_lex_first +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C | A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | +(5 rows) + +-- Expected: 1-3 (A B C wins by lexical order) +-- Test: A B C D E | A B C (lexical order - longer first wins) +WITH jacob_lex_long AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_lex_long +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | +(5 rows) + +-- Expected: 1-5 (A B C D E wins by lexical order) +-- ============================================ +-- Alternation with quantifiers (BUG cases from Jacob's tests) +-- ============================================ +-- Test: (A | B)+ C - alternation inside quantified group followed by C +WITH jacob_alt_quant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_alt_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {A} | | + 4 | {C} | | +(4 rows) + +-- Expected: 1-4 (A B A C) +-- Test: ((A | B) C)+ - alternation inside group with outer quantifier +WITH jacob_alt_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), + (3, ARRAY['B']), + (4, ARRAY['C']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_alt_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B) C)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {C} | | + 3 | {B} | | + 4 | {C} | | + 5 | {X} | | +(5 rows) + +-- Expected: 1-4 (A C B C) +-- ============================================ +-- RELUCTANT quantifiers (not yet supported) +-- ============================================ +-- Test: A+? B (reluctant) - parser rejects with ERROR +WITH jacob_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +ERROR: reluctant quantifiers are not yet supported +LINE 15: PATTERN (A+? B) + ^ +-- Expected: ERROR (reluctant quantifiers not yet supported) +-- Test: A{1,3}? B (reluctant bounded) - parser rejects with ERROR +WITH jacob_reluctant_bounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_reluctant_bounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +ERROR: reluctant quantifiers are not yet supported +LINE 15: PATTERN (A{1,3}? B) + ^ +-- Expected: ERROR (reluctant quantifiers not yet supported) +-- ============================================ +-- Nested quantifiers (pathological patterns) +-- ============================================ +-- These patterns previously caused segfault or infinite loop. +-- Now they are either optimized at compile time or handled safely at runtime. +-- Test: (A*)* - nested unbounded quantifiers (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A*)+ - inner nullable, outer requires one (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)+) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A+)* - outer nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A+)+ - both require match (optimized to A+) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)+) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A* B*)* - complex nested pattern (runtime protection) +-- Not optimized but handled safely by empty-match loop prevention +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A* B*)*) + DEFINE A AS TRUE, B AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (((A)*)*)* - triple nested (optimized through recursive optimization) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 3) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((((A)*)*)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 3 + 2 | 0 + 3 | 0 +(3 rows) + diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out new file mode 100644 index 00000000000..ec67a099ee6 --- /dev/null +++ b/src/test/regress/expected/rpr_base.out @@ -0,0 +1,5538 @@ +-- ============================================================ +-- RPR Base Tests +-- Tests for Row Pattern Recognition (ISO/IEC 19075-5:2016) +-- ============================================================ +-- +-- Parser Layer: +-- Keyword Usage Tests +-- DEFINE Clause Tests +-- FRAME Options Tests +-- PARTITION BY + FRAME Tests +-- PATTERN Syntax Tests +-- Quantifiers Tests +-- Navigation Functions Tests +-- SKIP TO / INITIAL Tests +-- Serialization/Deserialization Tests +-- Error Cases Tests +-- +-- Planner Layer: +-- Pattern Optimization Tests +-- Absorption Flag Display Tests +-- Absorption Analysis Tests +-- Edge Case Tests +-- Optimization Fallback Tests +-- Planner Integration Tests +-- Subquery and CTE Tests +-- JOIN Tests +-- Complex Expression Tests +-- Set Operations Tests +-- Sorting and Grouping Tests +-- Stress Tests +-- Error Limit Tests +-- +-- Contributed Tests: +-- Jacob's Patterns +-- Pathological Patterns +-- ============================================================ +SET client_min_messages = WARNING; +-- ============================================================ +-- Keyword Usage Tests +-- ============================================================ +-- RPR keywords as column names +-- Keywords: define, initial, past, pattern, seek +CREATE TABLE rpr_keywords ( + id INT, + define INT, -- DEFINE keyword + initial INT, -- INITIAL keyword + past INT, -- PAST keyword + pattern INT, -- PATTERN keyword + seek INT, -- SEEK keyword + skip INT -- SKIP keyword (pre-existing) +); +INSERT INTO rpr_keywords VALUES (1, 10, 20, 30, 40, 50, 60); +SELECT id, define, initial, past, pattern, seek, skip +FROM rpr_keywords +ORDER BY id; + id | define | initial | past | pattern | seek | skip +----+--------+---------+------+---------+------+------ + 1 | 10 | 20 | 30 | 40 | 50 | 60 +(1 row) + +DROP TABLE rpr_keywords; +-- ============================================================ +-- DEFINE Clause Tests +-- ============================================================ +-- Simple column references +CREATE TABLE stock_price ( + dt DATE, + symbol TEXT, + price NUMERIC, + volume INT +); +INSERT INTO stock_price VALUES + ('2024-01-01', 'AAPL', 150, 1000), + ('2024-01-02', 'AAPL', 155, 1200), + ('2024-01-03', 'AAPL', 152, 900), + ('2024-01-04', 'AAPL', 160, 1500), + ('2024-01-05', 'AAPL', 158, 1100); +-- Simple column reference +SELECT dt, price, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (UP+) + DEFINE UP AS price > 150 +) +ORDER BY dt; + dt | price | cnt +------------+-------+----- + 01-01-2024 | 150 | 0 + 01-02-2024 | 155 | 4 + 01-03-2024 | 152 | 0 + 01-04-2024 | 160 | 0 + 01-05-2024 | 158 | 0 +(5 rows) + +-- Multiple column references +SELECT dt, price, volume, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (GOOD+) + DEFINE GOOD AS price > 150 AND volume > 1000 +) +ORDER BY dt; + dt | price | volume | cnt +------------+-------+--------+----- + 01-01-2024 | 150 | 1000 | 0 + 01-02-2024 | 155 | 1200 | 1 + 01-03-2024 | 152 | 900 | 0 + 01-04-2024 | 160 | 1500 | 2 + 01-05-2024 | 158 | 1100 | 0 +(5 rows) + +-- Expression in DEFINE +SELECT dt, price, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (HIGH+) + DEFINE HIGH AS price * 1.1 > 165 +) +ORDER BY dt; + dt | price | cnt +------------+-------+----- + 01-01-2024 | 150 | 0 + 01-02-2024 | 155 | 4 + 01-03-2024 | 152 | 0 + 01-04-2024 | 160 | 0 + 01-05-2024 | 158 | 0 +(5 rows) + +-- Arithmetic and functions +SELECT dt, price, volume, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (CALC+) + DEFINE CALC AS (price + volume / 100) > 160 +) +ORDER BY dt; + dt | price | volume | cnt +------------+-------+--------+----- + 01-01-2024 | 150 | 1000 | 0 + 01-02-2024 | 155 | 1200 | 4 + 01-03-2024 | 152 | 900 | 0 + 01-04-2024 | 160 | 1500 | 0 + 01-05-2024 | 158 | 1100 | 0 +(5 rows) + +DROP TABLE stock_price; +-- Auto-generated DEFINE +CREATE TABLE rpr_auto (id INT, val INT); +INSERT INTO rpr_auto VALUES (1, 10), (2, 20), (3, 30), (4, 15); +-- One variable undefined (B auto-generated as "B IS TRUE") +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B*) + DEFINE A AS val > 15 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 4 | 15 | 0 +(4 rows) + +-- Multiple undefined variables +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE A AS val > 0 + -- B and C auto-generated as "B IS TRUE", "C IS TRUE" +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 15 | 0 +(4 rows) + +-- All variables defined explicitly +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (X Y Z) + DEFINE + X AS val > 10, + Y AS val > 20, + Z AS val < 20 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 4 | 15 | 0 +(4 rows) + +DROP TABLE rpr_auto; +-- Duplicate variable names +CREATE TABLE rpr_dup (id INT); +INSERT INTO rpr_dup VALUES (1), (2); +-- Duplicate DEFINE entries +SELECT COUNT(*) OVER w +FROM rpr_dup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id > 0, A AS id < 10 +); +ERROR: row pattern definition variable name "a" appears more than once in DEFINE clause +LINE 7: DEFINE A AS id > 0, A AS id < 10 + ^ +-- Expected: ERROR: row pattern definition variable name "a" appears more than once in DEFINE clause +DROP TABLE rpr_dup; +-- Boolean coercion +CREATE TABLE rpr_bool (id INT, flag BOOLEAN); +INSERT INTO rpr_bool VALUES (1, true), (2, false); +-- Non-boolean expression +SELECT COUNT(*) OVER w +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id +); +ERROR: argument of DEFINE must be type boolean, not type integer +LINE 7: DEFINE A AS id + ^ +-- Expected: ERROR: argument of DEFINE must be type boolean +-- Boolean column reference +SELECT id, flag, COUNT(*) OVER w as cnt +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (T+) + DEFINE T AS flag +) +ORDER BY id; + id | flag | cnt +----+------+----- + 1 | t | 1 + 2 | f | 0 +(2 rows) + +-- NULL::boolean +SELECT id, COUNT(*) OVER w as cnt +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (N+) + DEFINE N AS NULL::boolean +) +ORDER BY id; + id | cnt +----+----- + 1 | 0 + 2 | 0 +(2 rows) + +DROP TABLE rpr_bool; +-- Complex expressions +CREATE TABLE rpr_complex (id INT, val1 INT, val2 INT); +INSERT INTO rpr_complex VALUES (1, 10, 20), (2, 15, 25), (3, 20, 30); +-- CASE expression +SELECT id, val1, val2, COUNT(*) OVER w as cnt +FROM rpr_complex +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C+) + DEFINE C AS CASE WHEN val1 > 10 THEN val2 > 20 ELSE false END +) +ORDER BY id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 20 | 0 + 2 | 15 | 25 | 2 + 3 | 20 | 30 | 0 +(3 rows) + +DROP TABLE rpr_complex; +-- Pattern variable not in PATTERN (should be ignored) +CREATE TABLE rpr_unused (id INT); +INSERT INTO rpr_unused VALUES (1), (2); +-- Extra DEFINE variable +SELECT id, COUNT(*) OVER w as cnt +FROM rpr_unused +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id > 0, B AS id > 5 -- B not in pattern +) +ORDER BY id; + id | cnt +----+----- + 1 | 2 + 2 | 0 +(2 rows) + +DROP TABLE rpr_unused; +-- ============================================================ +-- FRAME Options Tests +-- ============================================================ +CREATE TABLE rpr_frame (id INT, val INT); +INSERT INTO rpr_frame VALUES + (1, 10), (2, 10), (3, 10), -- Same val: 10 + (4, 20), (5, 20), -- Same val: 20 + (6, 30); +-- Valid frame options +-- ROWS: counts physical rows (1 FOLLOWING = next 1 physical row) +-- Expected result: Each row can see 1 physical row ahead +-- id=1,2,3 (val=10): can see next row -> cnt=2 +-- id=4,5 (val=20): can see next row -> cnt=2 +-- id=6 (val=30): no next row -> cnt=1 +-- Result: [2,2,2,2,2,1] +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 10 | 2 + 3 | 10 | 2 + 4 | 20 | 2 + 5 | 20 | 2 + 6 | 30 | 1 +(6 rows) + +-- Invalid frame start positions +-- Not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME must start at CURRENT ROW when row pattern recognition is used +LINE 5: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ^ +DETAIL: Current frame starts with UNBOUNDED PRECEDING. +HINT: Use: ROWS BETWEEN CURRENT ROW AND ... +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- EXCLUDE options +-- EXCLUDE not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE CURRENT ROW + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: EXCLUDE options are not permitted when row pattern recognition is used +LINE 6: EXCLUDE CURRENT ROW + ^ +DETAIL: Frame definition includes EXCLUDE CURRENT ROW. +HINT: Remove the EXCLUDE clause from the window definition. +-- Expected: ERROR: EXCLUDE options are not permitted when row pattern recognition is used +-- EXCLUDE GROUP not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE GROUP + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: EXCLUDE options are not permitted when row pattern recognition is used +LINE 6: EXCLUDE GROUP + ^ +DETAIL: Frame definition includes EXCLUDE GROUP. +HINT: Remove the EXCLUDE clause from the window definition. +-- Expected: ERROR: EXCLUDE options are not permitted when row pattern recognition is used +-- EXCLUDE TIES not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE TIES + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: EXCLUDE options are not permitted when row pattern recognition is used +LINE 6: EXCLUDE TIES + ^ +DETAIL: Frame definition includes EXCLUDE TIES. +HINT: Remove the EXCLUDE clause from the window definition. +-- Expected: ERROR: EXCLUDE options are not permitted when row pattern recognition is used +-- RANGE frame not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME option RANGE is not permitted when row pattern recognition is used +LINE 5: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWIN... + ^ +HINT: Use: ROWS instead +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used +-- GROUPS frame not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME option GROUP is not permitted when row pattern recognition is used +LINE 5: GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWI... + ^ +HINT: Use: ROWS instead +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used +-- Starting with N PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME must start at CURRENT ROW when row pattern recognition is used +LINE 5: ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + ^ +DETAIL: Current frame starts with offset PRECEDING. +HINT: Use: ROWS BETWEEN CURRENT ROW AND ... +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Starting with N FOLLOWING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME must start at CURRENT ROW when row pattern recognition is used +LINE 5: ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING + ^ +DETAIL: Current frame starts with offset FOLLOWING. +HINT: Use: ROWS BETWEEN CURRENT ROW AND ... +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Frame end bound edge cases +-- End before start: CURRENT ROW AND 1 PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 PRECEDING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: frame starting from current row cannot have preceding rows +LINE 5: ROWS BETWEEN CURRENT ROW AND 1 PRECEDING + ^ +-- Expected: ERROR: frame starting from current row cannot have preceding rows +-- End before start: CURRENT ROW AND UNBOUNDED PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED PRECEDING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: frame end cannot be UNBOUNDED PRECEDING +LINE 5: ROWS BETWEEN CURRENT ROW AND UNBOUNDED PRECEDING + ^ +-- Expected: ERROR: frame end cannot be UNBOUNDED PRECEDING +-- Single row frame: CURRENT ROW AND CURRENT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND CURRENT ROW + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 10 | 1 + 3 | 10 | 1 + 4 | 20 | 1 + 5 | 20 | 1 + 6 | 30 | 1 +(6 rows) + +-- Zero offset: CURRENT ROW AND 0 FOLLOWING (equivalent to CURRENT ROW) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 0 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 10 | 1 + 3 | 10 | 1 + 4 | 20 | 1 + 5 | 20 | 1 + 6 | 30 | 1 +(6 rows) + +-- Large offset: CURRENT ROW AND 1000 FOLLOWING +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1000 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 10 | 5 + 3 | 10 | 4 + 4 | 20 | 3 + 5 | 20 | 2 + 6 | 30 | 1 +(6 rows) + +-- Maximum offset: CURRENT ROW AND 2147483646 FOLLOWING (INT_MAX - 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2147483646 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 10 | 5 + 3 | 10 | 4 + 4 | 20 | 3 + 5 | 20 | 2 + 6 | 30 | 1 +(6 rows) + +-- RANGE frame with RPR (not permitted) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; +ERROR: FRAME option RANGE is not permitted when row pattern recognition is used +LINE 5: RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + ^ +HINT: Use: ROWS instead +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used +-- GROUPS frame with RPR (not permitted) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + GROUPS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; +ERROR: FRAME option GROUP is not permitted when row pattern recognition is used +LINE 5: GROUPS BETWEEN CURRENT ROW AND 1 FOLLOWING + ^ +HINT: Use: ROWS instead +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used +DROP TABLE rpr_frame; +-- ============================================================ +-- PARTITION BY + FRAME Tests +-- ============================================================ +-- Test PARTITION BY with RPR to ensure proper partitioning behavior +CREATE TABLE rpr_partition (id INT, grp INT, val INT); +INSERT INTO rpr_partition VALUES + (1, 1, 10), (2, 1, 20), (3, 1, 30), + (4, 2, 15), (5, 2, 25), (6, 2, 35); +-- PARTITION BY with ROWS frame +SELECT id, grp, val, COUNT(*) OVER w as cnt +FROM rpr_partition +WINDOW w AS ( + PARTITION BY grp + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE A AS val >= 10, B AS val > 15 +) +ORDER BY id; + id | grp | val | cnt +----+-----+-----+----- + 1 | 1 | 10 | 3 + 2 | 1 | 20 | 2 + 3 | 1 | 30 | 0 + 4 | 2 | 15 | 3 + 5 | 2 | 25 | 2 + 6 | 2 | 35 | 0 +(6 rows) + +-- Expected: Pattern matching should reset for each partition +-- PARTITION BY with RANGE frame +SELECT id, grp, val, COUNT(*) OVER w as cnt +FROM rpr_partition +WINDOW w AS ( + PARTITION BY grp + ORDER BY val + RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 10, B AS val >= 20 +) +ORDER BY id; +ERROR: FRAME option RANGE is not permitted when row pattern recognition is used +LINE 6: RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + ^ +HINT: Use: ROWS instead +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used +DROP TABLE rpr_partition; +-- ============================================================ +-- PATTERN Syntax Tests +-- ============================================================ +CREATE TABLE rpr_pattern (id INT, val INT); +INSERT INTO rpr_pattern VALUES + (1, 5), (2, 10), (3, 15), (4, 20), (5, 25), + (6, 30), (7, 35), (8, 40), (9, 45), (10, 50); +-- Alternation (|) +-- Multiple alternatives +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ | B+ | C+) + DEFINE A AS val > 35, B AS val BETWEEN 15 AND 35, C AS val < 15 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 2 + 2 | 10 | 0 + 3 | 15 | 5 + 4 | 20 | 0 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 3 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Grouping +-- Nested grouping with quantifier +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B) C)+) + DEFINE A AS val > 10, B AS val > 20, C AS val > 30 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 0 + 5 | 25 | 6 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Sequence +-- Multi-element sequence +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D E) + DEFINE + A AS val < 15, + B AS val BETWEEN 15 AND 25, + C AS val BETWEEN 25 AND 35, + D AS val BETWEEN 35 AND 45, + E AS val >= 45 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 0 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Complex combinations +-- Alternation with grouping +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B) | (C D)) + DEFINE A AS val < 20, B AS val >= 20, C AS val < 30, D AS val >= 30 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 2 + 4 | 20 | 0 + 5 | 25 | 2 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Alternation + sequence + grouping +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START (UP{2,} DOWN? | FLAT+) FINISH) + DEFINE + START AS val >= 0, + UP AS val > 20, + DOWN AS val <= 30, + FLAT AS val BETWEEN 25 AND 35, + FINISH AS val > 40 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 7 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Nested alternation in groups +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) (C | D)) + DEFINE A AS val < 15, B AS val BETWEEN 15 AND 25, C AS val BETWEEN 25 AND 35, D AS val > 35 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 2 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +DROP TABLE rpr_pattern; +-- ============================================================ +-- Quantifiers Tests +-- ============================================================ +CREATE TABLE rpr_quant (id INT, val INT); +INSERT INTO rpr_quant VALUES + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50), + (6, 60), (7, 70), (8, 80), (9, 90), (10, 100); +-- Basic greedy quantifiers +-- * (zero or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- + (one or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 5 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ? (zero or one) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A?) + DEFINE A AS val = 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 1 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Edge case quantifiers +-- {0} is not allowed (min must be >= 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0} B) + DEFINE A AS val > 1000, B AS val > 0 +) +ORDER BY id; +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{0} B) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {0,0} is not allowed (max must be >= 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0,0} B) + DEFINE A AS val > 1000, B AS val > 0 +) +ORDER BY id; +ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +LINE 6: PATTERN (A{0,0} B) + ^ +-- Expected: ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +-- {0,1} (equivalent to ?) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0,1}) + DEFINE A AS val = 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 1 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Exact quantifiers {n} +-- {3} (representative exact quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 3 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 3 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Range quantifiers {n,} +-- {2,} (representative n or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}) + DEFINE A AS val > 40 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 6 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Upper bound quantifiers {,m} +-- {,3} (representative up to m) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 3 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 3 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 1 +(10 rows) + +-- Range quantifiers {n,m} +-- {3,7} (representative range) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3,7}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 7 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 3 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +DROP TABLE rpr_quant; +-- Reluctant quantifiers (not yet supported) +CREATE TABLE rpr_reluctant (id INT, val INT); +INSERT INTO rpr_reluctant VALUES (1, 10), (2, 20), (3, 30); +-- *? (zero or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A*?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- +? (one or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A+?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- ?? (zero or one, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A??) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A??) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- {n,}? (n or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A{2,}?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- {n,m}? (n to m, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,3}?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A{1,3}?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- {n}? (exactly n, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2}?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A{2}?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- {,m}? (up to m, reluctant) - COMPLETELY UNTESTED RULE! +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A{,3}?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- Invalid reluctant patterns (wrong token after quantifier) +-- {2}+ (should be {2}? not {2}+) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2}+) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "+" +LINE 6: PATTERN (A{2}+) + ^ +-- Expected: ERROR: syntax error at or near "+" +-- {2,}* (should be {2,}? not {2,}*) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}*) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "*" +LINE 6: PATTERN (A{2,}*) + ^ +-- Expected: ERROR: syntax error at or near "*" +-- {,3}* (should be {,3}? not {,3}*) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}*) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "*" +LINE 6: PATTERN (A{,3}*) + ^ +-- Expected: ERROR: syntax error at or near "*" +-- {1,3}+ (should be {1,3}? not {1,3}+) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,3}+) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "+" +LINE 6: PATTERN (A{1,3}+) + ^ +-- Expected: ERROR: syntax error at or near "+" +-- Boundary errors in reluctant quantifiers +-- {-1}? (negative bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1}?) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1}?) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- {2147483647}? (INT_MAX) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{2147483647}?) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {-1,}? (negative lower bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,}?) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1,}?) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- {2147483647,}? (INT_MAX lower bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647,}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 0 and 2147483646 +LINE 6: PATTERN (A{2147483647,}?) + ^ +-- Expected: ERROR: quantifier bound must be between 0 and 2147483646 +-- {,0}? (zero upper bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,0}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,0}?) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {,2147483647}? (INT_MAX upper bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,2147483647}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,2147483647}?) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {-1,3}? (negative lower in range) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,3}?) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1,3}?) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- {1,2147483647}? (INT_MAX upper in range) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,2147483647}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +LINE 6: PATTERN (A{1,2147483647}?) + ^ +-- Expected: ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +-- {5,3}? (min > max) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{5,3}?) + DEFINE A AS val > 0 +); +ERROR: quantifier minimum bound must not exceed maximum +LINE 6: PATTERN (A{5,3}?) + ^ +-- Expected: ERROR: quantifier minimum bound must not exceed maximum +-- Token-separated reluctant quantifiers (space between quantifier and ?) +-- These may be tokenized differently by the lexer +-- * ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* ?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A* ?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- + ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ ?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A+ ?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- {2,} ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,} ?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A{2,} ?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +-- Invalid token combinations +-- * + (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* +) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "+" +LINE 6: PATTERN (A* +) + ^ +-- Expected: ERROR: syntax error at or near "+" +-- + * (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ *) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "*" +LINE 6: PATTERN (A+ *) + ^ +-- Expected: ERROR: syntax error at or near "*" +-- ? ? (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A? ?) + DEFINE A AS val > 0 +); +ERROR: reluctant quantifiers are not yet supported +LINE 6: PATTERN (A? ?) + ^ +-- Expected: ERROR: reluctant quantifiers are not yet supported +DROP TABLE rpr_reluctant; +-- Quantifier boundary conditions +CREATE TABLE rpr_bounds (id INT); +INSERT INTO rpr_bounds VALUES (1), (2); +-- min > max +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{5,3}) + DEFINE A AS id > 0 +); +ERROR: quantifier minimum bound must not exceed maximum +LINE 6: PATTERN (A{5,3}) + ^ +-- Expected: ERROR: quantifier minimum bound must not exceed maximum +-- Large bounds +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1000,2000}) + DEFINE A AS id > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- Very large bound +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{100000}) + DEFINE A AS id > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- INT_MAX - 1 = 2147483646 (at limit) +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483646}) + DEFINE A AS id > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- INT_MAX = 2147483647 (over limit) +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{2147483647}) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {n,} boundary errors +-- Negative lower bound in {n,} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,}) + DEFINE A AS id > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1,}) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- INT_MAX in {n,} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647,}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 0 and 2147483646 +LINE 6: PATTERN (A{2147483647,}) + ^ +-- Expected: ERROR: quantifier bound must be between 0 and 2147483646 +-- {,m} boundary errors +-- Zero upper bound in {,m} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,0}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,0}) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- INT_MAX in {,m} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,2147483647}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,2147483647}) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +DROP TABLE rpr_bounds; +-- ============================================================ +-- Navigation Functions Tests (PREV / NEXT) +-- ============================================================ +CREATE TABLE rpr_nav (id INT, val INT); +INSERT INTO rpr_nav VALUES + (1, 10), (2, 20), (3, 15), (4, 25), (5, 30); +-- PREV function - reference previous row in pattern +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS val > PREV(val) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- NEXT function - reference next row in pattern +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE + A AS val < NEXT(val), + B AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- Combined PREV and NEXT +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE + A AS val > 0, + B AS val > PREV(val) AND val < NEXT(val), + C AS val > PREV(val) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +DROP TABLE rpr_nav; +-- ============================================================ +-- SKIP TO / INITIAL Tests +-- ============================================================ +CREATE TABLE rpr_skip (id INT, val INT); +INSERT INTO rpr_skip VALUES + (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), + (6, 6), (7, 7), (8, 8); +-- SKIP TO NEXT ROW +-- SKIP TO NEXT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 1 | 0 + 2 | 2 | 0 + 3 | 3 | 3 + 4 | 4 | 3 + 5 | 5 | 3 + 6 | 6 | 3 + 7 | 7 | 0 + 8 | 8 | 0 +(8 rows) + +-- SKIP PAST LAST ROW +-- SKIP PAST LAST ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 1 | 0 + 2 | 2 | 0 + 3 | 3 | 3 + 4 | 4 | 0 + 5 | 5 | 0 + 6 | 6 | 3 + 7 | 7 | 0 + 8 | 8 | 0 +(8 rows) + +-- Default behavior (should be SKIP PAST LAST ROW) +-- No SKIP TO clause (default) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B) + DEFINE A AS val > 0, B AS val > 1 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 1 | 2 + 2 | 2 | 0 + 3 | 3 | 2 + 4 | 4 | 0 + 5 | 5 | 2 + 6 | 6 | 0 + 7 | 7 | 2 + 8 | 8 | 0 +(8 rows) + +-- Compare default with explicit PAST LAST ROW +-- Results should be identical +WITH default_skip AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_skip + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 + ) +), +explicit_skip AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_skip + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 + ) +) +SELECT 'default' as type, * FROM default_skip +UNION ALL +SELECT 'explicit' as type, * FROM explicit_skip +ORDER BY type, id; + type | id | val | cnt +----------+----+-----+----- + default | 1 | 1 | 0 + default | 2 | 2 | 0 + default | 3 | 3 | 3 + default | 4 | 4 | 0 + default | 5 | 5 | 0 + default | 6 | 6 | 3 + default | 7 | 7 | 0 + default | 8 | 8 | 0 + explicit | 1 | 1 | 0 + explicit | 2 | 2 | 0 + explicit | 3 | 3 | 3 + explicit | 4 | 4 | 0 + explicit | 5 | 5 | 0 + explicit | 6 | 6 | 3 + explicit | 7 | 7 | 0 + explicit | 8 | 8 | 0 +(16 rows) + +DROP TABLE rpr_skip; +-- INITIAL clause +CREATE TABLE rpr_init (id INT, val INT); +INSERT INTO rpr_init VALUES (1, 10), (2, 20), (3, 30), (4, 40); +-- Explicit INITIAL +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_init +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(4 rows) + +-- Implicit INITIAL (default) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_init +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(4 rows) + +DROP TABLE rpr_init; +-- SEEK +CREATE TABLE rpr_seek (id INT, val INT); +INSERT INTO rpr_seek VALUES (1, 10); +-- SEEK keyword +SELECT COUNT(*) OVER w +FROM rpr_seek +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + SEEK + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: SEEK is not supported +LINE 6: SEEK + ^ +HINT: Use INITIAL instead. +-- Expected: ERROR: SEEK is not supported +-- HINT: Use INITIAL instead. +DROP TABLE rpr_seek; +-- ============================================================ +-- Serialization/Deserialization Tests +-- ============================================================ +-- View creation and deparsing +CREATE TABLE rpr_serial (id INT, val INT); +INSERT INTO rpr_serial VALUES + (1, 10), (2, 20), (3, 15), (4, 25), (5, 30); +-- Simple pattern +CREATE VIEW rpr_serial_v1 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Verify view works (tests deserialization) +SELECT * FROM rpr_serial_v1 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 5 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- Verify deparsing +SELECT pg_get_viewdef('rpr_serial_v1'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+) + + DEFINE + + a AS (val > 0) ); +(1 row) + +DROP VIEW rpr_serial_v1; +-- Complex pattern with alternation +CREATE VIEW rpr_serial_v2 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ | B*) + DEFINE A AS val > 20, B AS val <= 20 +); +SELECT * FROM rpr_serial_v2 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 2 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v2'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+ | b*) + + DEFINE + + a AS (val > 20), + + b AS (val <= 20) ); +(1 row) + +DROP VIEW rpr_serial_v2; +-- Pattern with grouping and quantifiers +CREATE VIEW rpr_serial_v3 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2,5} | C*) + DEFINE + A AS val > 10, + B AS val > 20, + C AS val <= 10 +); +SELECT * FROM rpr_serial_v3 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v3'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b){2,5} | c*) + + DEFINE + + a AS (val > 10), + + b AS (val > 20), + + c AS (val <= 10) ); +(1 row) + +DROP VIEW rpr_serial_v3; +-- All features combined +CREATE VIEW rpr_serial_v4 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START (MID{1,3} | ALT+) FINISH) + DEFINE + START AS val > 5, + MID AS val BETWEEN 10 AND 25, + ALT AS val > 25, + FINISH AS val > 15 +); +SELECT * FROM rpr_serial_v4 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 5 + 2 | 20 | 4 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v4'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP TO NEXT ROW + + INITIAL + + PATTERN (start (mid{1,3} | alt+) finish) + + DEFINE + + start AS (val > 5), + + mid AS ((val >= 10) AND (val <= 25)), + + alt AS (val > 25), + + finish AS (val > 15) ); +(1 row) + +DROP VIEW rpr_serial_v4; +-- Additional quantifiers for deparsing coverage +-- ? quantifier (zero or one) +CREATE VIEW rpr_serial_v5 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B?) + DEFINE A AS val > 10, B AS val > 20 +); +SELECT * FROM rpr_serial_v5 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 1 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 30 | 1 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v5'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b?) + + DEFINE + + a AS (val > 10), + + b AS (val > 20) ); +(1 row) + +DROP VIEW rpr_serial_v5; +-- {n,} quantifier (n or more) +CREATE VIEW rpr_serial_v6 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}) + DEFINE A AS val > 15 +); +SELECT * FROM rpr_serial_v6 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 2 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v6'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{2,}) + + DEFINE + + a AS (val > 15) ); +(1 row) + +DROP VIEW rpr_serial_v6; +-- {n} quantifier (exactly n) +CREATE VIEW rpr_serial_v7 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3}) + DEFINE A AS val > 0 +); +SELECT * FROM rpr_serial_v7 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v7'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS (val > 0) ); +(1 row) + +DROP VIEW rpr_serial_v7; +-- Nested ALT pattern (tests deparse of complex nested structure) +CREATE VIEW rpr_serial_v8 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 15, B AS val <= 25, C AS val <= 30, D AS val > 30 +); +SELECT * FROM rpr_serial_v8 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v8'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (((a+ b) | c) d | a b c) + + DEFINE + + a AS (val <= 15), + + b AS (val <= 25), + + c AS (val <= 30), + + d AS (val > 30) ); +(1 row) + +DROP VIEW rpr_serial_v8; +DROP TABLE rpr_serial; +-- Materialized view (if supported) +CREATE TABLE rpr_mview (id INT, val INT); +INSERT INTO rpr_mview VALUES (1, 10), (2, 20), (3, 30); +CREATE MATERIALIZED VIEW rpr_mview_v1 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_mview +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +SELECT * FROM rpr_mview_v1 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +SELECT pg_get_viewdef('rpr_mview_v1'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_mview + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+) + + DEFINE + + a AS (val > 0) ); +(1 row) + +-- Refresh test +REFRESH MATERIALIZED VIEW rpr_mview_v1; +SELECT * FROM rpr_mview_v1 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +DROP MATERIALIZED VIEW rpr_mview_v1; +DROP TABLE rpr_mview; +-- Prepared statements (tests outfuncs.c / readfuncs.c) +CREATE TABLE rpr_prep (id INT, val INT); +INSERT INTO rpr_prep VALUES (1, 10), (2, 20), (3, 30); +-- Simple prepared statement +PREPARE rpr_prep_simple AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; +EXECUTE rpr_prep_simple; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +EXECUTE rpr_prep_simple; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +DEALLOCATE rpr_prep_simple; +-- Prepared statement with parameters +PREPARE rpr_prep_param(int) AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WHERE id <= $1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 10 +) +ORDER BY id; +EXECUTE rpr_prep_param(2); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 1 +(2 rows) + +EXECUTE rpr_prep_param(3); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 2 + 3 | 30 | 0 +(3 rows) + +DEALLOCATE rpr_prep_param; +-- Complex prepared statement +PREPARE rpr_prep_complex AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B){1,2} | C+) + DEFINE + A AS val > 5, + B AS val > 15, + C AS val <= 15 +) +ORDER BY id; +EXECUTE rpr_prep_complex; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 2 + 3 | 30 | 0 +(3 rows) + +EXECUTE rpr_prep_complex; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 2 + 3 | 30 | 0 +(3 rows) + +DEALLOCATE rpr_prep_complex; +DROP TABLE rpr_prep; +-- CTE and Subquery (tests copyfuncs.c) +CREATE TABLE rpr_copy (id INT, val INT); +INSERT INTO rpr_copy VALUES (1, 10), (2, 20), (3, 30), (4, 40); +-- Simple CTE +WITH rpr_cte AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM rpr_cte ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(4 rows) + +-- CTE with multiple references (forces node copy) +WITH rpr_cte AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 + ) +) +SELECT c1.id, c1.cnt as cnt1, c2.cnt as cnt2 +FROM rpr_cte c1 +JOIN rpr_cte c2 ON c1.id = c2.id +ORDER BY c1.id; + id | cnt1 | cnt2 +----+------+------ + 1 | 0 | 0 + 2 | 3 | 3 + 3 | 0 | 0 + 4 | 0 | 0 +(4 rows) + +-- Subquery in FROM clause +SELECT * +FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B?) + DEFINE A AS val > 10, B AS val > 20 + ) +) sub +WHERE cnt > 0 +ORDER BY id; + id | val | cnt +----+-----+----- +(0 rows) + +-- Nested subqueries +SELECT * +FROM ( + SELECT * + FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val >= 10 + ) + ) inner_sub + WHERE cnt > 0 +) outer_sub +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 +(1 row) + +DROP TABLE rpr_copy; +-- DISTINCT and set operations (tests equalfuncs.c) +CREATE TABLE rpr_equal (id INT, val INT); +INSERT INTO rpr_equal VALUES (1, 10), (2, 20), (3, 10), (4, 20); +-- DISTINCT with RPR +SELECT DISTINCT cnt +FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub +ORDER BY cnt; + cnt +----- + 1 + 2 + 3 + 4 +(4 rows) + +-- UNION with RPR in both sides +SELECT id, val, cnt FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE val = 10 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub1 +UNION +SELECT id, val, cnt FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE val = 20 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub2 +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 2 + 3 | 10 | 0 + 4 | 20 | 0 +(4 rows) + +-- UNION ALL +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 10 + ) +) sub +UNION ALL +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val <= 10 + ) +) sub +ORDER BY id, cnt; + id | cnt +----+----- + 1 | 0 + 1 | 1 + 2 | 0 + 2 | 1 + 3 | 0 + 3 | 1 + 4 | 0 + 4 | 1 +(8 rows) + +-- INTERSECT +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE id <= 3 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub1 +INTERSECT +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE id >= 2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub2 +ORDER BY id; + id | cnt +----+----- + 3 | 0 +(1 row) + +DROP TABLE rpr_equal; +-- View with multiple window definitions +CREATE TABLE rpr_multiwin (id INT, val INT); +INSERT INTO rpr_multiwin VALUES (1, 10), (2, 20), (3, 30); +CREATE VIEW rpr_multiwin_v AS +SELECT + id, + val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2 +FROM rpr_multiwin +WINDOW + w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 + ), + w2 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B*) + DEFINE B AS val <= 15 + ); +SELECT * FROM rpr_multiwin_v ORDER BY id; + id | val | cnt1 | cnt2 +----+-----+------+------ + 1 | 10 | 0 | 1 + 2 | 20 | 2 | 0 + 3 | 30 | 0 | 0 +(3 rows) + +SELECT pg_get_viewdef('rpr_multiwin_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------------------- + SELECT id, + + val, + + count(*) OVER w1 AS cnt1, + + count(*) OVER w2 AS cnt2 + + FROM rpr_multiwin + + WINDOW w1 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+) + + DEFINE + + a AS (val > 15) ), w2 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (b*) + + DEFINE + + b AS (val <= 15) ); +(1 row) + +DROP VIEW rpr_multiwin_v; +DROP TABLE rpr_multiwin; +-- ============================================================ +-- Error Cases Tests +-- ============================================================ +DROP TABLE IF EXISTS rpr_err; +CREATE TABLE rpr_err (id INT, val INT); +INSERT INTO rpr_err VALUES (1, 10), (2, 20); +-- Syntax errors +-- Invalid quantifier syntax +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+!) + DEFINE A AS val > 0 +); +ERROR: unsupported quantifier "+!" +LINE 6: PATTERN (A+!) + ^ +HINT: Valid quantifiers are: *, +, ?, *?, +?, ??, {n}, {n,}, {,m}, {n,m} and their reluctant versions. +-- Expected: Syntax error +-- Unmatched parentheses +SET client_min_messages = NOTICE; +DO $$ +BEGIN + EXECUTE 'SELECT COUNT(*) OVER w FROM rpr_err WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B) DEFINE A AS val > 0, B AS val > 10)'; + RAISE NOTICE 'Unmatched parentheses: UNEXPECTED SUCCESS'; +EXCEPTION + WHEN syntax_error THEN + RAISE NOTICE 'Unmatched parentheses: EXPECTED ERROR - %', SQLERRM; + WHEN OTHERS THEN + RAISE NOTICE 'Unmatched parentheses: UNEXPECTED ERROR - %', SQLERRM; +END $$; +NOTICE: Unmatched parentheses: EXPECTED ERROR - syntax error at or near "AS" +SET client_min_messages = WARNING; +-- Empty DEFINE +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE +); +ERROR: syntax error at or near ")" +LINE 8: ); + ^ +-- Expected: Syntax error +-- Empty PATTERN +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN () + DEFINE A AS val > 0 +); +ERROR: syntax error at or near ")" +LINE 6: PATTERN () + ^ +-- Expected: Syntax error +-- DEFINE without PATTERN (PATTERN and DEFINE must be used together) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "DEFINE" +LINE 6: DEFINE A AS val > 0 + ^ +-- Expected: Syntax error +-- Qualified column references (NOT SUPPORTED) +-- Pattern variables in DEFINE clause cannot use qualified references (A.price) +-- This gives a confusing error about missing FROM-clause entry +-- Qualified reference in DEFINE clause +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS A.val > 0 +); +ERROR: missing FROM-clause entry for table "a" +LINE 7: DEFINE A AS A.val > 0 + ^ +-- Expected: ERROR: missing FROM-clause entry for table "a" +-- Semantic errors +-- Undefined column in DEFINE +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS nonexistent_column > 0 +); +ERROR: column "nonexistent_column" does not exist +LINE 7: DEFINE A AS nonexistent_column > 0 + ^ +-- Expected: ERROR: column "nonexistent_column" does not exist +-- Type mismatch +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 'string' +); +ERROR: invalid input syntax for type integer: "string" +LINE 7: DEFINE A AS val > 'string' + ^ +-- Expected: ERROR: invalid input syntax for type integer: "string" +-- Aggregate function in DEFINE (if not allowed) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +); +ERROR: aggregate functions are not allowed in DEFINE +LINE 7: DEFINE A AS COUNT(*) > 0 + ^ +-- Expected: ERROR or works depending on implementation +-- Subquery in DEFINE (NOT SUPPORTED) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > (SELECT max(val) FROM rpr_err) +); +ERROR: cannot use subquery in DEFINE expression +LINE 7: DEFINE A AS val > (SELECT max(val) FROM rpr_err) + ^ +-- Expected: ERROR: cannot use subquery in DEFINE expression +-- Edge cases +-- Pattern variable not used (should work, extra vars ignored) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0, B AS val > 5, C AS val > 10 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 +(2 rows) + +DROP TABLE rpr_err; +-- NULL handling +CREATE TABLE rpr_null (id INT, val INT); +INSERT INTO rpr_null VALUES (1, 10), (2, NULL), (3, 30); +-- NULL in DEFINE expression +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | | 0 + 3 | 30 | 1 +(3 rows) + +-- IS NULL in DEFINE +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (N+) + DEFINE N AS val IS NULL +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | | 1 + 3 | 30 | 0 +(3 rows) + +-- IS NOT NULL in DEFINE +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (NN+) + DEFINE NN AS val IS NOT NULL +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | | 0 + 3 | 30 | 1 +(3 rows) + +DROP TABLE rpr_null; +-- ============================================================ +-- Pattern Optimization Tests +-- ============================================================ +-- Tests for pattern optimization in optimizer/plan/rpr.c +-- Use EXPLAIN to verify optimized pattern (shown as "Pattern: ...") +CREATE TABLE rpr_plan (id INT, val INT); +INSERT INTO rpr_plan VALUES + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50), + (6, 60), (7, 70), (8, 80), (9, 90), (10, 100); +-- Consecutive VAR merge: A A A -> a{3} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A A) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive VAR merge: A{2} A{3} -> a{5} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2} A{3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{5} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive VAR merge: A+ A* -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ A*) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive VAR merge: A A+ -> a{2,} +-- Tests line 251: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveVars +-- prev: A{1,1} (finite), child: A+ (infinite) triggers line 251 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive GROUP merge with finite quantifiers: ((A B){5}) ((A B){10}) -> merged +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B){5}) ((A B){10})) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){15} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive GROUP merge with unbounded: (A B)+ (A B)+ -> (a b){2,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive GROUP merge: (A B){2} (A B)+ -> (a b){3,} +-- Tests line 325: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveGroups +-- prev: (A B){2,2} (finite), child: (A B)+ (infinite) triggers line 325 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- PREFIX merge: A B (A B)+ -> (a b){2,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- PREFIX and SUFFIX merge: A B (A B)+ A B -> (a b){3,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+ A B) DEFINE A AS val <= 40, B AS val > 40); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Flatten nested: A ((B) (C)) -> a b c +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- ALT flatten: (A | (B | C))+ -> (a | b | c)+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | (B | C))+) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c)+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- ALT deduplicate: (A | B | A) -> (a | b) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B | A)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier multiply: (A{2}){3} -> a{6} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier multiply with child range: (A{2,3}){3} -> a{6,9} +-- outer exact, child range - optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2,3}){3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6,9} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier NO multiply: (A{2}){2,3} stays as (a{2}){2,3} +-- outer range - gaps would occur (4,6 not 4,5,6), no optimization +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){2,3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){2,3} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier NO multiply: (A{2}){2,} stays as (a{2}){2,} +-- outer unbounded - gaps would occur (4,6,8,... not 4,5,6,...), no optimization +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){2,}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){2,} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier multiply: (A){2,} -> a{2,} +-- child exact 1 - no gaps, optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A){2,}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier multiply: (A)+ -> a+ +-- child exact 1 - no gaps, optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier NO multiply: (A{2}){3,5} stays as (a{2}){3,5} +-- outer range, child exact > 1 - gaps would occur (6,8,10 not 6,7,8,9,10) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3,5}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){3,5} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Quantifier NO multiply: (A{2,3}){2,3} stays as (a{2,3}){2,3} +-- outer range, child range - gaps possible (e.g., (A{4,5}){2,3} misses 11) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2,3}){2,3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2,3}){2,3} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested unbounded: (A*)* -> a* +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A*)*) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a*" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested unbounded: (A+)* -> a* +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+)*) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a*" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested unbounded: (A+)+ -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+)+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Unwrap GROUP{1,1}: (A) -> a +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Unwrap GROUP{1,1}: (A B) -> a b +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Combined optimization: A A (B B)+ B B C C C -> a{2} (b{2}){2,} c{3} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A (B B)+ B B C C C) + DEFINE A AS val <= 20, B AS val > 20 AND val <= 70, C AS val > 70); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2} (b{2}){2,} c{3} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive GROUP merge with unbounded: (A+) (A+) -> a{2,} +-- Tests mergeConsecutiveGroups with child->max == INF +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+) (A+)) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive GROUP merge finite: (A{10}){20} -> a{200} +-- Tests mergeConsecutiveGroups with both finite +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{10}){20}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{200} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Different GROUP prevents merge: (A B){2} (C D){3} +-- Tests mergeConsecutiveGroups flush previous +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (C D){3}) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){2} (c d){3} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Different children count prevents merge: (A B)+ (A B C)+ +-- Tests rprPatternChildrenEqual length check +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ (A B C)+) + DEFINE A AS val <= 33, B AS val > 33 AND val <= 66, C AS val > 66); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" (a b c)+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- PREFIX only merge: A B (A B)+ -> (a b){2,} +-- Tests mergeGroupPrefixSuffix: absorb preceding elements into GROUP min +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- SUFFIX only merge: (A B)+ A B -> (a b){2,} +-- Tests mergeGroupPrefixSuffix: absorb following elements into GROUP min +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ A B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Multiple SUFFIX absorption with skipUntil: (A B)+ A B A B C +-- Tests mergeGroupPrefixSuffix: skip absorbed suffix elements +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ A B A B C) + DEFINE A AS val <= 50, B AS val > 50 AND val <= 75, C AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" c + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- PREFIX merge with remaining prefix: A B C D (C D)+ +-- Tests mergeGroupPrefixSuffix: trimmed list reconstruction +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D (C D)+) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (c d){2,} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- PREFIX merge with quantifiers: A B* (A B*)+ -> (a b*){2,} +-- Tests mergeGroupPrefixSuffix: quantifier comparison in rprPatternEqual +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B* (A B*)+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b*){2,} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- PREFIX merge with multiple quantifiers: A+ B* C? (A+ B* C?)+ -> (a+ b* c?){2,} +-- Tests mergeGroupPrefixSuffix: complex quantifier patterns +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B* C? (A+ B* C?)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" b* c?){2,} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- SUFFIX merge with quantifiers: (A B*)+ A B* -> (a b*){2,} +-- Tests mergeGroupPrefixSuffix: suffix with quantifiers +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B*)+ A B*) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b*){2,} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Unwrap GROUP{1,1}: ((A | B | C)) -> (a | b | c) +-- Tests tryUnwrapGroup removing redundant outer GROUP +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c) + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- ============================================================ +-- Absorption Flag Display Tests +-- ============================================================ +-- Tests absorption marker display in EXPLAIN output +-- Markers: ' = branch element, " = judgment point +-- Files: explain.c (append_rpr_quantifier, deparse_rpr_pattern) +-- Simple VAR: A+ -> a+" (judgment point) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- GROUP unbounded: (A B)+ -> (a' b')+" (branch + judgment) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- ALT both absorbable: A+ | B+ -> (a+" | b+") +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ | B+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b+") + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- ALT one absorbable: A+ | B -> (a+" | b) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ | B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b) + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Sequence with absorbable start: A+ B -> a+" b +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Complex nested: ((A+ B) | C) D | A B C - deeply nested ALT +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 30, B AS val <= 60, C AS val <= 80, D AS val > 80); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a+" b | c) d | a b c) + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested unbounded: (A+ | B)+ -> (a+" | b)+ (first iteration absorbable) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ | B)+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b)+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- ALT inside unbounded GROUP: (A+ B | A B)* -> (a+" b | a b)* (first iteration absorbable) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ B | A B)*) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" b | a b)* + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Non-absorbable (unbounded not at start): A B+ -> a b+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A B+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Non-absorbable (no unbounded branch): (A | B){2,} -> (a | b){2,} (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A | B){2,}) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2,} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Non-absorbable (SKIP TO NEXT ROW): A+ -> a+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW PATTERN (A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Non-absorbable (limited frame): A+ -> a+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+) DEFINE A AS val > 0); + QUERY PLAN +---------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND '10'::bigint FOLLOWING) + Pattern: a+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- ============================================================ +-- Absorption Analysis Tests +-- ============================================================ +-- Tests context absorption optimization (O(n^2) -> O(n)) +-- Files: rpr.c (computeAbsorbability) +-- Simple Absorbable Pattern: A+ B +-- Pattern starts with unbounded VAR +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Absorbable GROUP Pattern: (A B)+ C +-- Pattern starts with unbounded GROUP +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: Unbounded Not at Start +-- Pattern: A B+ (unbounded not at start) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 6 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ALT with Absorbable Branches +-- Pattern: (A+ | B+) C - both branches absorbable +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | B+) C) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 4 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ALT with Mixed Branches +-- Pattern: (A+ | B C) - only first branch absorbable +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | B C)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 2 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: ALT Inside GROUP +-- Pattern: (A | B){2,} - ALT inside unbounded GROUP +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B){2,}) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: Nested Unbounded +-- Pattern: ((A B)+ C)+ - nested GROUP structure +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B)+ C)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: Unbounded Element Inside GROUP +-- Pattern: (A B+){2,} - unbounded inside GROUP +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B+){2,}) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Runtime Conditions: SKIP TO NEXT ROW +-- Absorption disabled with SKIP TO NEXT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 20 | 5 + 3 | 30 | 4 + 4 | 40 | 3 + 5 | 50 | 2 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Runtime Conditions: Limited Frame +-- Absorption disabled with limited frame end +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ============================================================ +-- Edge Case Tests +-- ============================================================ +-- Tests boundary conditions and complex scenarios +-- Empty Match Prevention +-- Pattern that could match empty: A* +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*) + DEFINE A AS val > 1000 -- Never matches +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- All Rows Match +-- Pattern where every row matches +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val >= 0 -- Always true +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Large Quantifiers +-- Pattern: A{100} (large exact quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{100}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Pattern: A{10,20} (large range quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{10,20}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Complex Multi-Level Nesting +-- Pattern: (((A B) | C)+ D)+ +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A B) | C)+ D)+) + DEFINE A AS val <= 20, B AS val > 20 AND val <= 40, + C AS val > 40 AND val <= 60, D AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 3 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Long Alternation Chain +-- Pattern: A | B | C | D | E (5-way ALT) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS val = 10, B AS val = 30, C AS val = 50, + D AS val = 70, E AS val = 90 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 20 | 0 + 3 | 30 | 1 + 4 | 40 | 0 + 5 | 50 | 1 + 6 | 60 | 0 + 7 | 70 | 1 + 8 | 80 | 0 + 9 | 90 | 1 + 10 | 100 | 0 +(10 rows) + +-- Long Sequence +-- Pattern: A B C D E F G H (8-element SEQ) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D E F G H) + DEFINE A AS val >= 10, B AS val >= 20, C AS val >= 30, + D AS val >= 40, E AS val >= 50, F AS val >= 60, + G AS val >= 70, H AS val >= 80 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 8 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Interleaved Quantifiers +-- Pattern: A{2} B+ C{3,5} D* E{1,} +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2} B+ C{3,5} D* E{1,}) + DEFINE A AS val > 0, B AS val > 0, C AS val > 0, + D AS val > 0, E AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ============================================================ +-- Optimization Fallback Tests +-- ============================================================ +-- Tests for optimization edge cases and fallback behavior +CREATE TABLE rpr_fallback (id INT, val INT); +INSERT INTO rpr_fallback VALUES (1, 10), (2, 20); +-- Test: min quantifier overflow causes optimization fallback (min == max case) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000}){2}) + DEFINE A AS val > 0 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2000000000}){2} + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(6 rows) + +-- Expected: Fallback - pattern not merged due to min overflow (4000000000 > INT32_MAX) +-- Test: max-only quantifier overflow causes optimization fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{1,2000000000}){2}) + DEFINE A AS val > 0 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{1,2000000000}){2} + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(6 rows) + +-- Expected: Fallback - min OK (2*1=2), but max overflow (2*2000000000 > INT32_MAX) +-- Test: max quantifier exceeds valid range (2147483647 = INT_MAX, limit is 2147483646) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000,2147483647}){2}) + DEFINE A AS val > 0 +); +ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +LINE 6: PATTERN ((A{2000000000,2147483647}){2}) + ^ +-- Expected: ERROR at parse time before optimization +-- Test: nested unbounded with large min causes overflow fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000,}){2000000000,}) + DEFINE A AS val > 0 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2000000000,}"){2000000000,} + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(6 rows) + +-- Expected: Fallback - min overflow (2000000000 * 2000000000 > INT32_MAX) +-- Test: prefix mismatch causes optimization fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C D)+) + DEFINE A AS val > 0, B AS val > 5, C AS val > 10, D AS val > 15 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (c d)+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(6 rows) + +-- Expected: Fallback - prefix elements don't match GROUP content +DROP TABLE rpr_fallback; +-- ============================================================ +-- Planner Integration Tests +-- ============================================================ +-- Tests full planning pipeline and WindowAgg plan node creation +-- Files: planner.c, createplan.c +CREATE TABLE rpr_planner (id INT, category VARCHAR(10), val INT); +INSERT INTO rpr_planner VALUES + (1, 'A', 10), (2, 'A', 20), (3, 'A', 30), + (4, 'B', 40), (5, 'B', 50), (6, 'B', 60), + (7, 'C', 70), (8, 'C', 80), (9, 'C', 90); +-- Multiple Window Functions in Same Query +SELECT id, category, val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2 +FROM rpr_planner +WINDOW w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +), +w2 AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val >= 40 +) +ORDER BY id; + id | category | val | cnt1 | cnt2 +----+----------+-----+------+------ + 1 | A | 10 | 9 | 0 + 2 | A | 20 | 0 | 0 + 3 | A | 30 | 0 | 0 + 4 | B | 40 | 0 | 3 + 5 | B | 50 | 0 | 0 + 6 | B | 60 | 0 | 0 + 7 | C | 70 | 0 | 3 + 8 | C | 80 | 0 | 0 + 9 | C | 90 | 0 | 0 +(9 rows) + +-- Window Function with PARTITION BY +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category, id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 3 + 2 | A | 20 | 0 + 3 | A | 30 | 0 + 4 | B | 40 | 3 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 3 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(9 rows) + +-- Window Function with Complex ORDER BY +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + ORDER BY category DESC, val ASC + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category DESC, val ASC; + id | category | val | cnt +----+----------+-----+----- + 7 | C | 70 | 9 + 8 | C | 80 | 0 + 9 | C | 90 | 0 + 4 | B | 40 | 0 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 1 | A | 10 | 0 + 2 | A | 20 | 0 + 3 | A | 30 | 0 +(9 rows) + +-- Named Window Reference +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 + 2 | A | 20 | 0 + 3 | A | 30 | 0 + 4 | B | 40 | 0 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(9 rows) + +-- Inline Window Definition +SELECT id, category, val, + COUNT(*) OVER ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) as cnt +FROM rpr_planner +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 + 2 | A | 20 | 0 + 3 | A | 30 | 0 + 4 | B | 40 | 0 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(9 rows) + +-- Window with Aggregate Functions +SELECT category, + COUNT(*) OVER w as window_cnt, + COUNT(*) as agg_cnt +FROM rpr_planner +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +GROUP BY category +ORDER BY category; +ERROR: syntax error at or near "GROUP" +LINE 12: GROUP BY category + ^ +-- Expected: ERROR (GROUP BY with window RPR not supported) +-- ============================================================ +-- Subquery and CTE Tests +-- Files: planner.c, prepjointree.c +-- ============================================================ +-- Tests RPR with subqueries and CTEs +-- RPR in Subquery (FROM clause) +SELECT * FROM ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub +WHERE cnt > 5 +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 +(1 row) + +-- RPR with Subquery in WHERE +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WHERE val > (SELECT AVG(val) FROM rpr_planner) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 50 +) +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 6 | B | 60 | 4 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(4 rows) + +-- CTE with RPR +WITH rpr_cte AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM rpr_cte WHERE cnt > 5 ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 +(1 row) + +-- Multiple CTE References +WITH rpr_cte AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT c1.id, c1.cnt, c2.cnt as cnt2 +FROM rpr_cte c1 +JOIN rpr_cte c2 ON c1.id = c2.id +ORDER BY c1.id; + id | cnt | cnt2 +----+-----+------ + 1 | 9 | 9 + 2 | 0 | 0 + 3 | 0 | 0 + 4 | 0 | 0 + 5 | 0 | 0 + 6 | 0 | 0 + 7 | 0 | 0 + 8 | 0 | 0 + 9 | 0 | 0 +(9 rows) + +-- Nested CTEs +WITH cte1 AS ( + SELECT id, category, val FROM rpr_planner WHERE val > 30 +), +cte2 AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM cte1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM cte2 ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 4 | B | 40 | 6 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(6 rows) + +-- ============================================================ +-- JOIN Tests +-- Files: prepjointree.c, setrefs.c +-- ============================================================ +-- Tests RPR with JOINs and multiple table references +CREATE TABLE rpr_join1 (id INT, val1 INT); +CREATE TABLE rpr_join2 (id INT, val2 INT); +INSERT INTO rpr_join1 VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50); +INSERT INTO rpr_join2 VALUES (1, 100), (2, 200), (3, 300), (4, 400), (5, 500); +-- RPR After INNER JOIN +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +INNER JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 + val2 > 100 +) +ORDER BY t1.id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 100 | 5 + 2 | 20 | 200 | 0 + 3 | 30 | 300 | 0 + 4 | 40 | 400 | 0 + 5 | 50 | 500 | 0 +(5 rows) + +-- RPR After LEFT JOIN +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +LEFT JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 > 0 +) +ORDER BY t1.id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 100 | 5 + 2 | 20 | 200 | 0 + 3 | 30 | 300 | 0 + 4 | 40 | 400 | 0 + 5 | 50 | 500 | 0 +(5 rows) + +-- RPR with Multiple Tables in DEFINE +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +INNER JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE A AS t1.val1 > 20, + B AS t2.val2 > 200 +) +ORDER BY t1.id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 100 | 0 + 2 | 20 | 200 | 0 + 3 | 30 | 300 | 3 + 4 | 40 | 400 | 0 + 5 | 50 | 500 | 0 +(5 rows) + +-- RPR After Cross Join +SELECT t1.id as id1, t2.id as id2, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +CROSS JOIN rpr_join2 t2 +WHERE t1.id <= 2 AND t2.id <= 2 +WINDOW w AS ( + ORDER BY t1.id, t2.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 + val2 > 0 +) +ORDER BY t1.id, t2.id; + id1 | id2 | val1 | val2 | cnt +-----+-----+------+------+----- + 1 | 1 | 10 | 100 | 4 + 1 | 2 | 10 | 200 | 0 + 2 | 1 | 20 | 100 | 0 + 2 | 2 | 20 | 200 | 0 +(4 rows) + +-- Self-Join with RPR +SELECT a.id, a.val1, b.val1 as val1_next, + COUNT(*) OVER w as cnt +FROM rpr_join1 a +INNER JOIN rpr_join1 b ON a.id + 1 = b.id +WINDOW w AS ( + ORDER BY a.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (X+) + DEFINE X AS a.val1 < b.val1 +) +ORDER BY a.id; + id | val1 | val1_next | cnt +----+------+-----------+----- + 1 | 10 | 20 | 4 + 2 | 20 | 30 | 0 + 3 | 30 | 40 | 0 + 4 | 40 | 50 | 0 +(4 rows) + +DROP TABLE rpr_join1, rpr_join2; +-- ============================================================ +-- Complex Expression Tests +-- Files: createplan.c, setrefs.c +-- ============================================================ +-- Tests complex target list expressions +CREATE TABLE rpr_target (id INT, val INT); +INSERT INTO rpr_target VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50); +-- Expressions in Target List +SELECT id, + val * 2 as doubled, + val + 10 as added, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | doubled | added | cnt +----+---------+-------+----- + 1 | 20 | 20 | 5 + 2 | 40 | 30 | 0 + 3 | 60 | 40 | 0 + 4 | 80 | 50 | 0 + 5 | 100 | 60 | 0 +(5 rows) + +-- CASE Expression in Target List +SELECT id, val, + CASE + WHEN val < 30 THEN 'low' + WHEN val < 50 THEN 'medium' + ELSE 'high' + END as category, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | category | cnt +----+-----+----------+----- + 1 | 10 | low | 5 + 2 | 20 | low | 0 + 3 | 30 | medium | 0 + 4 | 40 | medium | 0 + 5 | 50 | high | 0 +(5 rows) + +-- Subquery in Target List +SELECT id, val, + (SELECT MAX(val) FROM rpr_target) as max_val, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | max_val | cnt +----+-----+---------+----- + 1 | 10 | 50 | 5 + 2 | 20 | 50 | 0 + 3 | 30 | 50 | 0 + 4 | 40 | 50 | 0 + 5 | 50 | 50 | 0 +(5 rows) + +-- Function Calls in Target List +SELECT id, val, + COALESCE(val, 0) as coalesced, + ABS(val - 30) as distance, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | coalesced | distance | cnt +----+-----+-----------+----------+----- + 1 | 10 | 10 | 20 | 5 + 2 | 20 | 20 | 10 | 0 + 3 | 30 | 30 | 0 | 0 + 4 | 40 | 40 | 10 | 0 + 5 | 50 | 50 | 20 | 0 +(5 rows) + +-- Column Aliases and References +SELECT id as row_id, + val as value, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY row_id; + row_id | value | cnt +--------+-------+----- + 1 | 10 | 5 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 +(5 rows) + +DROP TABLE rpr_target; +-- ============================================================ +-- Set Operations Tests +-- Files: planner.c +-- ============================================================ +-- Tests RPR with UNION, INTERSECT, EXCEPT +CREATE TABLE rpr_set1 (id INT, val INT); +CREATE TABLE rpr_set2 (id INT, val INT); +INSERT INTO rpr_set1 VALUES (1, 10), (2, 20), (3, 30); +INSERT INTO rpr_set2 VALUES (2, 20), (3, 30), (4, 40); +-- UNION with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +UNION +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 4 | 40 | 0 +(5 rows) + +-- UNION ALL with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +UNION ALL +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id, val; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(6 rows) + +-- INTERSECT with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +INTERSECT +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + id | val | cnt +----+-----+----- + 3 | 30 | 0 +(1 row) + +-- EXCEPT with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +EXCEPT +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 +(2 rows) + +DROP TABLE rpr_set1, rpr_set2; +-- ============================================================ +-- Sorting and Grouping Tests +-- Files: planner.c, createplan.c +-- ============================================================ +-- Tests RPR interaction with sorting and grouping +CREATE TABLE rpr_sort (id INT, category VARCHAR(10), val INT); +INSERT INTO rpr_sort VALUES + (1, 'A', 30), (2, 'B', 20), (3, 'A', 10), + (4, 'B', 40), (5, 'A', 50), (6, 'B', 60); +-- RPR with GROUP BY +SELECT category, + COUNT(*) as group_cnt, + MAX(val) as max_val, + COUNT(*) OVER w as window_cnt +FROM rpr_sort +GROUP BY category +WINDOW w AS ( + ORDER BY category + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +) +ORDER BY category; +ERROR: aggregate functions are not allowed in DEFINE +LINE 11: DEFINE A AS COUNT(*) > 0 + ^ +-- RPR with HAVING +SELECT category, + COUNT(*) as group_cnt, + COUNT(*) OVER w as window_cnt +FROM rpr_sort +GROUP BY category +HAVING COUNT(*) > 2 +WINDOW w AS ( + ORDER BY category + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +) +ORDER BY category; +ERROR: aggregate functions are not allowed in DEFINE +LINE 11: DEFINE A AS COUNT(*) > 0 + ^ +-- RPR with DISTINCT +SELECT DISTINCT category, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category; + category | cnt +----------+----- + A | 3 + A | 0 + B | 0 + B | 3 +(4 rows) + +-- RPR with ORDER BY (different from window ORDER BY) +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY val DESC; + id | category | val | cnt +----+----------+-----+----- + 6 | B | 60 | 0 + 5 | A | 50 | 0 + 4 | B | 40 | 0 + 1 | A | 30 | 6 + 2 | B | 20 | 0 + 3 | A | 10 | 0 +(6 rows) + +-- RPR with LIMIT and OFFSET +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id +LIMIT 3 OFFSET 1; + id | category | val | cnt +----+----------+-----+----- + 2 | B | 20 | 0 + 3 | A | 10 | 0 + 4 | B | 40 | 0 +(3 rows) + +DROP TABLE rpr_sort; +DROP TABLE rpr_planner; +-- ============================================================ +-- Stress Tests +-- ============================================================ +-- Edge cases and stress scenarios +CREATE TABLE rpr_stress (id INT, val INT); +INSERT INTO rpr_stress SELECT i, i * 10 FROM generate_series(1, 20) i; +-- Very Long Query with Many Windows +SELECT id, val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2, + COUNT(*) OVER w3 as cnt3 +FROM rpr_stress +WINDOW w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +), +w2 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val > 50 +), +w3 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C+) + DEFINE C AS val > 100 +) +ORDER BY id; + id | val | cnt1 | cnt2 | cnt3 +----+-----+------+------+------ + 1 | 10 | 20 | 0 | 0 + 2 | 20 | 0 | 0 | 0 + 3 | 30 | 0 | 0 | 0 + 4 | 40 | 0 | 0 | 0 + 5 | 50 | 0 | 0 | 0 + 6 | 60 | 0 | 15 | 0 + 7 | 70 | 0 | 0 | 0 + 8 | 80 | 0 | 0 | 0 + 9 | 90 | 0 | 0 | 0 + 10 | 100 | 0 | 0 | 0 + 11 | 110 | 0 | 0 | 10 + 12 | 120 | 0 | 0 | 0 + 13 | 130 | 0 | 0 | 0 + 14 | 140 | 0 | 0 | 0 + 15 | 150 | 0 | 0 | 0 + 16 | 160 | 0 | 0 | 0 + 17 | 170 | 0 | 0 | 0 + 18 | 180 | 0 | 0 | 0 + 19 | 190 | 0 | 0 | 0 + 20 | 200 | 0 | 0 | 0 +(20 rows) + +-- Deeply Nested Subqueries with RPR +SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT id, val, + COUNT(*) OVER w as cnt + FROM rpr_stress + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) + ) sub1 + ) sub2 +) sub3 +WHERE cnt > 10 +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 20 +(1 row) + +-- Complex Expression in DEFINE Clause +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE A AS (val % 3 = 0 OR val % 5 = 0), + B AS (val * 2 > 100 AND val / 2 < 100) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 19 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 + 11 | 110 | 0 + 12 | 120 | 0 + 13 | 130 | 0 + 14 | 140 | 0 + 15 | 150 | 0 + 16 | 160 | 0 + 17 | 170 | 0 + 18 | 180 | 0 + 19 | 190 | 0 + 20 | 200 | 0 +(20 rows) + +-- Window with No Matching Rows +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WHERE val > 1000 -- No rows match +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- +(0 rows) + +-- Window on Single Row +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WHERE id = 10 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 10 | 100 | 1 +(1 row) + +DROP TABLE rpr_stress; +-- ============================================================ +-- Error Limit Tests +-- ============================================================ +-- Tests for error conditions in rpr.c +CREATE TABLE rpr_errors (id INT, val INT); +INSERT INTO rpr_errors VALUES (1, 10), (2, 20); +-- Test: PATTERN variable without DEFINE (A), DEFINE variable not in PATTERN (B) +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A) + DEFINE + B AS TRUE +); + id | val | count +----+-----+------- + 1 | 10 | 0 + 2 | 20 | 0 +(2 rows) + +-- Expected: Success - A is implicitly TRUE, B is filtered out +-- Test: 3 variables in PATTERN, 253 in DEFINE (DEFINE filtering test) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0, V252 AS val > 0, V253 AS val > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- Expected: Success - unused DEFINE variables are filtered out +-- Test: 251 variables in PATTERN, 252 in DEFINE (boundary - should succeed) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0, V252 AS val > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- Expected: Success - unused DEFINE variables are filtered out +-- Test: 252 variables in PATTERN, 251 in DEFINE (exceeds limit with implicit TRUE) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0 +); +ERROR: too many pattern variables +DETAIL: Maximum is 251. +-- Expected: ERROR - too many pattern variables (Maximum is 251) +-- Test: Pattern nesting at maximum depth (depth 253) +-- Note: 253 nested GROUP{3,7} quantifiers produce depth 253 after optimization +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + DEFINE A AS val > 0 +); + id | val | count +----+-----+------- + 1 | 10 | 0 + 2 | 20 | 0 +(2 rows) + +-- Expected: Should succeed +-- Test: Pattern nesting depth exceeds maximum (depth 254) +-- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + DEFINE A AS val > 0 +); +ERROR: pattern nesting too deep +DETAIL: Pattern nesting depth 254 exceeds maximum 253. +-- Expected: ERROR - pattern nesting too deep +DROP TABLE rpr_errors; +-- ============================================================ +-- Jacob's Patterns +-- ============================================================ +-- Basic pattern matching tests from jacob branch +-- Test: A? (optional, greedy) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A?) + DEFINE A AS val > 50 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 1 + 7 | 70 | 1 + 8 | 80 | 1 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + +-- Test: A{2} (exact count) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2}) + DEFINE A AS val <= 50 +); + id | val | c +----+-----+--- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 30 | 2 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: A{1,3} (bounded range, greedy) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}) + DEFINE A AS val <= 50 +); + id | val | c +----+-----+--- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 2 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: A | B (simple alternation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B) + DEFINE A AS val <= 30, B AS val > 70 +); + id | val | c +----+-----+--- + 1 | 10 | 1 + 2 | 20 | 1 + 3 | 30 | 1 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 1 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + +-- Test: A | B | C (three-way alternation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B | C) + DEFINE A AS val <= 20, B AS val BETWEEN 40 AND 60, C AS val > 80 +); + id | val | c +----+-----+--- + 1 | 10 | 1 + 2 | 20 | 1 + 3 | 30 | 0 + 4 | 40 | 1 + 5 | 50 | 1 + 6 | 60 | 1 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + +-- Test: A B C (concatenation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 60 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: A B? C (optional middle) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 60 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: (A B)+ (grouped quantifier) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS val <= 50, B AS val > 50 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 2 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: (A | B)+ C (alternation with quantifier) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 80 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: (A+ | (A | B)+)* - nested alternation inside quantified group +-- Previously caused infinite recursion in nfa_advance_alt when the inner +-- BEGIN(+)'s skip jump was followed as an ALT branch pointer. +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM (VALUES + (1, ARRAY['A', 'B']), + (2, ARRAY['B']), + (3, ARRAY['C']) +) AS t(id, flags) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | (A | B)+)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 2 + 2 | {B} | | + 3 | {C} | | +(3 rows) + +-- ============================================================ +-- Pathological Patterns +-- ============================================================ +-- These patterns previously caused issues. Now optimized or handled safely. +-- Test: (A*)* - nested unbounded (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A*)+ - inner nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)+) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A+)* - outer nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A+)+ - both require match (optimized to A+) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)+) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (((A)*)*)* - triple nested (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 3) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((((A)*)*)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 3 + 2 | 0 + 3 | 0 +(3 rows) + +-- Optional group with alternation: A ((B | C) (D | E))* F? +-- When only A matches, the * group matches 0 times and F? matches 0 times +SELECT id, val, match_len +FROM (SELECT id, val, + COUNT(*) OVER w AS match_len + FROM (VALUES (1, 1), (2, 99)) AS t(id, val) + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A ((B | C) (D | E))* F?) + DEFINE A AS val = 1, + B AS val = 2, C AS val = 3, + D AS val = 4, E AS val = 5, + F AS val = 6 + ) +) s; + id | val | match_len +----+-----+----------- + 1 | 1 | 1 + 2 | 99 | 0 +(2 rows) + +DROP TABLE rpr_plan; +-- ============================================================ +-- End of rpr_base.sql +-- ============================================================ diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out new file mode 100644 index 00000000000..f23d06f6d59 --- /dev/null +++ b/src/test/regress/expected/rpr_explain.out @@ -0,0 +1,3863 @@ +-- ============================================================ +-- RPR EXPLAIN Tests +-- Tests for Row Pattern Recognition EXPLAIN output +-- ============================================================ +-- +-- This test suite validates EXPLAIN output for RPR queries, +-- including NFA statistics shown in EXPLAIN ANALYZE: +-- - NFA States: peak, total, merged +-- - NFA Contexts: peak, total, absorbed, skipped +-- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) +-- - Pattern deparse formatting +-- - Multiple output formats (text, JSON, XML) +-- +-- Test Coverage: +-- Basic NFA Statistics Tests +-- State Statistics Tests +-- Context Statistics Tests +-- Match Length Statistics Tests +-- Mismatch Length Statistics Tests +-- JSON Format Tests +-- XML Format Tests +-- Multiple Partitions Tests +-- Edge Cases +-- Complex Pattern Tests +-- Real-world Pattern Examples +-- Performance-oriented Tests +-- INITIAL vs no INITIAL comparison +-- Quantifier Variations +-- Regression Tests for Statistics Accuracy +-- Alternation Pattern Tests +-- Group Pattern Tests +-- Window Function Combinations +-- DEFINE Expression Variations +-- Large Scale Statistics Verification +-- ============================================================ +-- Filter function to normalize Storage memory values only (not NFA statistics). +-- NFA statistics should not change between platforms; if they do, it could +-- indicate issues such as uninitialized memory access. +-- Works for text, JSON, and XML formats. +create function rpr_explain_filter(text) returns setof text +language plpgsql as +$$ +declare + ln text; +begin + for ln in execute $1 + loop + -- Normalize memory size in Storage line only (platform-dependent) + -- Keep NFA statistics numbers unchanged (they are test assertions) + + -- Text format: "Storage: Memory Maximum Storage: 18kB" + if ln ~ 'Storage:.*Maximum Storage:' then + ln := regexp_replace(ln, '\m\d+kB', 'NkB', 'g'); + end if; + + -- JSON format: "Maximum Storage": 17 (number in kB units) + if ln ~ '"Maximum Storage":' then + ln := regexp_replace(ln, '"Maximum Storage": \d+', '"Maximum Storage": 0', 'g'); + end if; + + -- XML format: 17 (number in kB units) + if ln ~ '' then + ln := regexp_replace(ln, '\d+', '0', 'g'); + end if; + + return next ln; + end loop; +end; +$$; +-- Setup: Create test tables +CREATE TEMP TABLE nfa_test ( + id serial, + v int, + cat char(1) +); +-- Insert test data: 100 rows with predictable pattern +INSERT INTO nfa_test (v, cat) +SELECT i, + CASE + WHEN i % 5 = 1 THEN 'A' + WHEN i % 5 = 2 THEN 'B' + WHEN i % 5 = 3 THEN 'C' + WHEN i % 5 = 4 THEN 'D' + ELSE 'E' + END +FROM generate_series(1, 100) i; +-- Additional test table with more complex patterns +CREATE TEMP TABLE nfa_complex ( + id serial, + price int, + trend char(1) -- U=up, D=down, S=stable +); +INSERT INTO nfa_complex (price, trend) +VALUES + (100, 'S'), (105, 'U'), (110, 'U'), (108, 'D'), (112, 'U'), + (115, 'U'), (113, 'D'), (111, 'D'), (109, 'D'), (110, 'U'), + (120, 'U'), (125, 'U'), (130, 'U'), (128, 'D'), (126, 'D'), + (124, 'D'), (122, 'D'), (120, 'D'), (118, 'D'), (119, 'U'), + (121, 'U'), (123, 'U'), (125, 'U'), (127, 'U'), (129, 'U'), + (131, 'U'), (133, 'U'), (130, 'D'), (127, 'D'), (124, 'D'); +-- ============================================================ +-- Basic NFA Statistics Tests +-- ============================================================ +-- Simple pattern - should show basic statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = 'A', B AS cat = 'B' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = ''A'', B AS cat = ''B'' +)'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 101 total, 60 pruned + NFA: 20 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Pattern with no matches - 0 matched +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = 'X', Y AS cat = 'Y', Z AS cat = 'Z' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (x y z) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = ''X'', Y AS cat = ''Y'', Z AS cat = ''Z'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: x y z + Storage: Memory Maximum Storage: NkB + NFA States: 1 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 101 total, 100 pruned + NFA: 0 matched, 0 mismatched + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Pattern matching every row - high match count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------- + PATTERN (r) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: r + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 101 total, 0 pruned + NFA: 100 matched (len 1/1/1.0), 0 mismatched + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Regression test: Space before parenthesis in pattern deparse +-- Verifies that "A (B | C)" correctly outputs as "a (b | c)" with space +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a (b | c)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a (b | c) + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 28 total, 0 merged + NFA Contexts: 2 peak, 21 total, 6 pruned + NFA: 7 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 7 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Regression test: Sequential alternations at same depth +-- Verifies that "((B | C) (D | E))" correctly outputs as "(b | c) (d | e)" +-- Previously failed due to missing parentheses on ALT depth decrease +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------------- + PATTERN (a ((b | c) (d | e))*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a ((b | c) (d | e))* + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 49 total, 0 merged + NFA Contexts: 3 peak, 31 total, 24 pruned + NFA: 6 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- State Statistics Tests (peak, total, merged) +-- ============================================================ +-- Simple quantifier pattern - A+ with short matches (no merging) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 76 total, 0 merged + NFA Contexts: 3 peak, 51 total, 25 pruned + NFA: 25 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Alternation pattern - multiple state branches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b | c) (d | e)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c) (d | e) + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 363 total, 0 merged + NFA Contexts: 3 peak, 101 total, 20 pruned + NFA: 20 matched (len 2/2/2.0), 40 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Complex pattern with high state count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a+ b* c+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b* c+ + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 235 total, 0 merged + NFA Contexts: 3 peak, 101 total, 34 pruned + NFA: 33 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 33 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Grouped pattern with quantifier - state merging +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN ((a b)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 61 total, 0 pruned + NFA: 1 matched (len 60/60/60.0), 0 mismatched + NFA: 29 absorbed (len 1/1/1.0), 30 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- State explosion pattern - many alternations +-- Pattern (A|B)(A|B)(A|B)(A|B) can create many parallel states +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------------------------------------------ + PATTERN ((a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){8} + Storage: Memory Maximum Storage: NkB + NFA States: 16 peak, 548 total, 0 merged + NFA Contexts: 8 peak, 101 total, 1 pruned + NFA: 12 matched (len 8/8/8.0), 3 mismatched (len 2/4/3.0) + NFA: 0 absorbed, 84 skipped (len 1/7/4.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Consecutive ALT merge followed by different ALT +-- Tests mergeConsecutiveAlts flush on ALT change: (A|B){2} (C|D) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------------- + PATTERN ((a | b) (a | b) (c | d)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2} (c | d) + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 111 total, 0 merged + NFA Contexts: 3 peak, 41 total, 12 pruned + NFA: 9 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 18 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Consecutive ALT merge followed by non-ALT element +-- Tests mergeConsecutiveAlts flush on non-ALT: (A|B){2} c +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a | b) (a | b) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2} c + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 109 total, 0 merged + NFA Contexts: 3 peak, 41 total, 2 pruned + NFA: 12 matched (len 3/3/3.0), 2 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 24 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ALT prefix/suffix absorbed into GROUP: (A|B) (A|B)+ (A|B) -> (A|B){3,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------------------------- + PATTERN ((a | b) (a | b)+ (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){3,} + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 161 total, 0 merged + NFA Contexts: 3 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 0 absorbed, 39 skipped (len 1/2/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- High state merging - alternation with plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((a | b | c)+ d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c)+ d + Storage: Memory Maximum Storage: NkB + NFA States: 15 peak, 753 total, 0 merged + NFA Contexts: 4 peak, 101 total, 0 pruned + NFA: 25 matched (len 4/4/4.0), 0 mismatched + NFA: 0 absorbed, 75 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Nested quantifiers causing state growth +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a | b)+)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +);'); + rpr_explain_filter +------------------------------------------------------------------------ + WindowAgg (actual rows=1000.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 3336 total, 0 merged + NFA Contexts: 3 peak, 1001 total, 333 pruned + NFA: 334 matched (len 1/2/2.0), 0 mismatched + NFA: 0 absorbed, 333 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=1000.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Context Statistics Tests (peak, total, absorbed, skipped) +-- ============================================================ +-- Context absorption with unbounded quantifier at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- No absorption - bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,4} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,4} b + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 101 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 40 skipped (len 1/4/2.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Contexts skipped by SKIP PAST LAST ROW +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 3 peak, 101 total, 80 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- High context absorption - unbounded group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN ((a b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 134 total, 0 merged + NFA Contexts: 3 peak, 101 total, 34 pruned + NFA: 33 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 33 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Match Length Statistics Tests +-- ============================================================ +-- Fixed length matches - all same length +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a b c d e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d e + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 3 peak, 101 total, 60 pruned + NFA: 20 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Variable length matches - min/max/avg differ +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 191 total, 0 merged + NFA Contexts: 2 peak, 101 total, 0 pruned + NFA: 10 matched (len 10/10/10.0), 0 mismatched + NFA: 80 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Very long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=200.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 396 total, 0 merged + NFA Contexts: 2 peak, 201 total, 4 pruned + NFA: 1 matched (len 196/196/196.0), 0 mismatched + NFA: 194 absorbed (len 1/1/1.0), 1 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=200.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Mix of short and long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 171 total, 0 merged + NFA Contexts: 3 peak, 101 total, 25 pruned + NFA: 5 matched (len 5/5/5.0), 5 mismatched (len 11/11/11.0) + NFA: 60 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Mismatch Length Statistics Tests +-- ============================================================ +-- Pattern that causes mismatches with length > 1 +-- Mismatch happens when partial match fails after processing multiple rows +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN 'A' + WHEN v % 10 IN (4,5) THEN 'B' + WHEN v % 10 = 6 THEN 'C' + ELSE 'X' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a+ b+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN ''A'' + WHEN v % 10 IN (4,5) THEN ''B'' + WHEN v % 10 = 6 THEN ''C'' + ELSE ''X'' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b+ c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 151 total, 0 merged + NFA Contexts: 3 peak, 101 total, 60 pruned + NFA: 10 matched (len 6/6/6.0), 0 mismatched + NFA: 20 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Long partial matches that fail +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN 'A' + WHEN i <= 25 THEN 'B' + WHEN i = 26 THEN 'X' -- breaks the pattern + WHEN i <= 50 THEN 'A' + WHEN i <= 55 THEN 'B' + WHEN i = 56 THEN 'C' -- completes pattern + ELSE 'Y' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a+ b+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN ''A'' + WHEN i <= 25 THEN ''B'' + WHEN i = 26 THEN ''X'' -- breaks the pattern + WHEN i <= 50 THEN ''A'' + WHEN i <= 55 THEN ''B'' + WHEN i = 56 THEN ''C'' -- completes pattern + ELSE ''Y'' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b+ c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 115 total, 0 merged + NFA Contexts: 3 peak, 61 total, 15 pruned + NFA: 1 matched (len 30/30/30.0), 1 mismatched (len 26/26/26.0) + NFA: 42 absorbed (len 1/1/1.0), 1 skipped (len 1/1/1.0) + -> Function Scan on generate_series i (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- JSON Format Tests +-- ============================================================ +-- JSON format output with all statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a+ b+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 50.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "a+\" b+", + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 3, + + "NFA States Total": 85, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 3, + + "NFA Contexts Total": 51, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 17, + + "NFA Contexts Pruned": 16, + + "NFA Matched": 17, + + "NFA Mismatched": 0, + + "NFA Match Length Min": 2, + + "NFA Match Length Max": 2, + + "NFA Match Length Avg": 2.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + + "Plans": [ + + { + + "Node Type": "Function Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Function Name": "generate_series", + + "Alias": "s", + + "Actual Rows": 50.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +DROP VIEW rpr_v; +-- JSON format with match length statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "a+\" b", + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 3, + + "NFA States Total": 191, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 2, + + "NFA Contexts Total": 101, + + "NFA Contexts Absorbed": 80, + + "NFA Contexts Skipped": 10, + + "NFA Contexts Pruned": 0, + + "NFA Matched": 10, + + "NFA Mismatched": 0, + + "NFA Match Length Min": 10, + + "NFA Match Length Max": 10, + + "NFA Match Length Avg": 10.0, + + "NFA Absorbed Length Min": 1, + + "NFA Absorbed Length Max": 1, + + "NFA Absorbed Length Avg": 1.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + + "Plans": [ + + { + + "Node Type": "Function Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Function Name": "generate_series", + + "Alias": "s", + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +DROP VIEW rpr_v; +-- ============================================================ +-- XML Format Tests +-- ============================================================ +-- XML format output +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); + rpr_explain_filter +-------------------------------------------------------------------------------- + + + + + + + WindowAgg + + false + + false + + 30.00 + + 1 + + false + + w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)+ + a b + + Memory + + 0 + + 2 + + 31 + + 0 + + 2 + + 31 + + 0 + + 15 + + 0 + + 15 + + 0 + + 2 + + 2 + + 2.0 + + 1 + + 1 + + 1.0 + + + + + + Function Scan + + Outer + + false + + false + + generate_series + + s + + 30.00 + + 1 + + false + + + + + + + + + + + + + + +(1 row) + +DROP VIEW rpr_v; +-- JSON format with mismatch statistics +-- Pattern A B C expects 1,2,3 but gets 1,2,4 twice causing mismatches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 9.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "a b c", + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 2, + + "NFA States Total": 10, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 3, + + "NFA Contexts Total": 10, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 1, + + "NFA Contexts Pruned": 5, + + "NFA Matched": 1, + + "NFA Mismatched": 2, + + "NFA Match Length Min": 3, + + "NFA Match Length Max": 3, + + "NFA Match Length Avg": 3.0, + + "NFA Mismatch Length Min": 3, + + "NFA Mismatch Length Max": 3, + + "NFA Mismatch Length Avg": 3.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + + "Plans": [ + + { + + "Node Type": "Values Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Alias": "*VALUES*", + + "Actual Rows": 9.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +DROP VIEW rpr_v; +-- JSON format with skipped context statistics +-- Alternation pattern with SKIP PAST LAST ROW causes many contexts to be skipped +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------------------------------------------ + PATTERN ((a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "(a | b){8}", + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 16, + + "NFA States Total": 548, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 8, + + "NFA Contexts Total": 101, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 84, + + "NFA Contexts Pruned": 1, + + "NFA Matched": 12, + + "NFA Mismatched": 3, + + "NFA Match Length Min": 8, + + "NFA Match Length Max": 8, + + "NFA Match Length Avg": 8.0, + + "NFA Mismatch Length Min": 2, + + "NFA Mismatch Length Max": 4, + + "NFA Mismatch Length Avg": 3.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 7, + + "NFA Skipped Length Avg": 4.0, + + "Plans": [ + + { + + "Node Type": "Function Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Function Name": "generate_series", + + "Alias": "s", + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +DROP VIEW rpr_v; +-- ============================================================ +-- Multiple Partitions Tests +-- ============================================================ +-- Statistics across multiple partitions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------------------------ + WindowAgg (actual rows=90.00 loops=1) + Window: w AS (PARTITION BY p.p ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 165 total, 0 merged + NFA Contexts: 2 peak, 93 total, 0 pruned + NFA: 18 matched (len 5/5/5.0), 0 mismatched + NFA: 54 absorbed (len 1/1/1.0), 18 skipped (len 1/1/1.0) + -> Sort (actual rows=90.00 loops=1) + Sort Key: p.p + Sort Method: quicksort Memory: 27kB + -> Nested Loop (actual rows=90.00 loops=1) + -> Function Scan on generate_series p (actual rows=3.00 loops=1) + -> Function Scan on generate_series v (actual rows=30.00 loops=3) +(14 rows) + +DROP VIEW rpr_v; +-- Different pattern behavior per partition +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +);'); + rpr_explain_filter +-------------------------------------------------------------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (PARTITION BY (CASE WHEN (v.v <= 25) THEN 1 ELSE 2 END) ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 77 total, 0 merged + NFA Contexts: 2 peak, 52 total, 21 pruned + NFA: 5 matched (len 5/6/5.8), 0 mismatched + NFA: 19 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) + -> Sort (actual rows=50.00 loops=1) + Sort Key: (CASE WHEN (v.v <= 25) THEN 1 ELSE 2 END) + Sort Method: quicksort Memory: 26kB + -> Function Scan on generate_series v (actual rows=50.00 loops=1) +(12 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Edge Cases +-- ============================================================ +-- Empty result set +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +);'); + rpr_explain_filter +--------------------------------------------------------------------- + WindowAgg (actual rows=0.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + -> Function Scan on generate_series s (actual rows=0.00 loops=1) +(4 rows) + +DROP VIEW rpr_v; +-- Single row +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------- + PATTERN (a) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +);'); + rpr_explain_filter +--------------------------------------------------------------------- + WindowAgg (actual rows=1.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 2 total, 0 merged + NFA Contexts: 2 peak, 2 total, 0 pruned + NFA: 1 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=1.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Pattern longer than data +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN (a b c d e f g h i j) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +);'); + rpr_explain_filter +--------------------------------------------------------------------- + WindowAgg (actual rows=5.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d e f g h i j + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 6 total, 0 merged + NFA Contexts: 3 peak, 6 total, 4 pruned + NFA: 0 matched, 1 mismatched (len 5/5/5.0) + -> Function Scan on generate_series s (actual rows=5.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- All rows match as single match +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 1 matched (len 50/50/50.0), 0 mismatched + NFA: 49 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Complex Pattern Tests +-- ============================================================ +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN (((a b) c)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b' c')+" + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 81 total, 0 merged + NFA Contexts: 3 peak, 61 total, 20 pruned + NFA: 1 matched (len 60/60/60.0), 0 mismatched + NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Multiple alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b) (c | d | e)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) (c | d | e) + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 282 total, 0 merged + NFA Contexts: 3 peak, 101 total, 40 pruned + NFA: 20 matched (len 2/2/2.0), 20 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Optional elements +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a b? c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b? c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 64 total, 0 merged + NFA Contexts: 3 peak, 51 total, 25 pruned + NFA: 12 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 12 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Bounded quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,5} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,5} b + Storage: Memory Maximum Storage: NkB + NFA States: 9 peak, 311 total, 0 merged + NFA Contexts: 7 peak, 101 total, 0 pruned + NFA: 10 matched (len 6/6/6.0), 40 mismatched (len 6/6/6.0) + NFA: 0 absorbed, 50 skipped (len 1/5/3.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Star quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a b* c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b* c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 3 peak, 51 total, 40 pruned + NFA: 5 matched (len 9/9/9.0), 0 mismatched + NFA: 0 absorbed, 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Real-world Pattern Examples +-- ============================================================ +-- Stock price pattern - V-shape (down then up) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = 'D', U AS trend = 'U' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (d+ u+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = ''D'', U AS trend = ''U'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: d+" u+ + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 58 total, 0 merged + NFA Contexts: 3 peak, 31 total, 3 pruned + NFA: 3 matched (len 3/14/8.0), 1 mismatched (len 3/3/3.0) + NFA: 9 absorbed (len 1/1/1.0), 14 skipped (len 1/1/1.0) + -> Seq Scan on nfa_complex (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Stock price pattern - peak (up, stable, down) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = 'U', S AS trend = 'S', D AS trend = 'D' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (u+ s* d+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = ''U'', S AS trend = ''S'', D AS trend = ''D'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: u+" s* d+ + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 76 total, 0 merged + NFA Contexts: 3 peak, 31 total, 1 pruned + NFA: 4 matched (len 3/11/7.2), 0 mismatched + NFA: 12 absorbed (len 1/1/1.0), 13 skipped (len 1/1/1.0) + -> Seq Scan on nfa_complex (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Consecutive increasing values (using PREV) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a{3,}) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3,}" + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 99 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 1 matched (len 50/50/50.0), 0 mismatched + NFA: 49 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Performance-oriented Tests +-- ============================================================ +-- Large dataset with simple pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------------ + WindowAgg (actual rows=1000.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 1001 total, 0 merged + NFA Contexts: 2 peak, 1001 total, 0 pruned + NFA: 500 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 500 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=1000.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Large dataset with absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------------ + WindowAgg (actual rows=1000.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 1991 total, 0 merged + NFA Contexts: 2 peak, 1001 total, 0 pruned + NFA: 10 matched (len 100/100/100.0), 0 mismatched + NFA: 980 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=1000.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- High state merge ratio +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a | b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ c + Storage: Memory Maximum Storage: NkB + NFA States: 8 peak, 2004 total, 0 merged + NFA Contexts: 3 peak, 501 total, 1 pruned + NFA: 166 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 332 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- INITIAL vs no INITIAL comparison +-- ============================================================ +-- With INITIAL keyword +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Without INITIAL keyword (same behavior currently) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Quantifier Variations +-- ============================================================ +-- Plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 71 total, 0 merged + NFA Contexts: 3 peak, 41 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 20 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Star quantifier (zero or more) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a* b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a*" b + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 102 total, 0 merged + NFA Contexts: 2 peak, 41 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 20 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Question mark (zero or one) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a? b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a? b c + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 82 total, 0 merged + NFA Contexts: 3 peak, 41 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Exact count {n} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a{3} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3} b + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 51 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 4/4/4.0), 10 mismatched (len 4/4/4.0) + NFA: 0 absorbed, 30 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Range {n,m} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,4} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,4} b + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 101 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 40 skipped (len 1/4/2.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- At least {n,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a{3,} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3,}" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 86 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 5 matched (len 10/10/10.0), 0 mismatched + NFA: 40 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Regression Tests for Statistics Accuracy +-- ============================================================ +-- Verify state count accuracy +-- Pattern A+ B with 20 rows should show predictable state behavior +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 37 total, 0 merged + NFA Contexts: 2 peak, 21 total, 0 pruned + NFA: 4 matched (len 5/5/5.0), 0 mismatched + NFA: 12 absorbed (len 1/1/1.0), 4 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Verify context count with known absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a+ b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 52 total, 0 merged + NFA Contexts: 3 peak, 31 total, 6 pruned + NFA: 3 matched (len 9/9/9.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 3 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Verify match length with fixed-length pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 31 total, 0 merged + NFA Contexts: 3 peak, 31 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Alternation Pattern Tests +-- ============================================================ +-- Simple alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN ((a | b) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 202 total, 0 merged + NFA Contexts: 3 peak, 101 total, 40 pruned + NFA: 20 matched (len 2/2/2.0), 20 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Multiple items in alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a | b | c | d) e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c | d) e + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 404 total, 0 merged + NFA Contexts: 3 peak, 101 total, 0 pruned + NFA: 20 matched (len 2/2/2.0), 60 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on nfa_test (actual rows=100.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Alternation with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a | b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ c + Storage: Memory Maximum Storage: NkB + NFA States: 8 peak, 204 total, 0 merged + NFA Contexts: 3 peak, 51 total, 1 pruned + NFA: 16 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 32 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Multiple alternatives (4+) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN (a | b | c | d | e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c | d | e) + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 505 total, 0 merged + NFA Contexts: 2 peak, 101 total, 0 pruned + NFA: 100 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Alternation at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a | b) c d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c d + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 122 total, 0 merged + NFA Contexts: 3 peak, 61 total, 16 pruned + NFA: 15 matched (len 3/3/3.0), 14 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 15 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Multiple sequential alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b) c (d | e) f) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c (d | e) f + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 219 total, 0 merged + NFA Contexts: 3 peak, 101 total, 67 pruned + NFA: 0 matched, 33 mismatched (len 2/4/3.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Quantified alternatives +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a+ | b+) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b+") c + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 162 total, 0 merged + NFA Contexts: 3 peak, 61 total, 1 pruned + NFA: 20 matched (len 2/2/2.0), 19 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Alternation at end +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (a b (c | d)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (c | d) + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 75 total, 0 merged + NFA Contexts: 3 peak, 61 total, 32 pruned + NFA: 14 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 14 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Nested ALT at start of branch inside outer ALT +-- Pattern: (A ((B | C) D | E)) - preceding VAR + inner ALT as first branch element +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN (a ((b | c) d | e)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a ((b | c) d | e) + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 29 total, 0 merged + NFA Contexts: 3 peak, 21 total, 17 pruned + NFA: 0 matched, 3 mismatched (len 3/3/3.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Nested ALT at end of branch inside outer ALT +-- Pattern: (C (A | B) | D) - inner ALT is last element in outer branch +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------- + PATTERN (c (a | b) | d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (c (a | b) | d) + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 47 total, 0 merged + NFA Contexts: 3 peak, 21 total, 10 pruned + NFA: 5 matched (len 1/1/1.0), 5 mismatched (len 2/2/2.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Group Pattern Tests +-- ============================================================ +-- Simple group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN ((a b)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 61 total, 0 merged + NFA Contexts: 2 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Group with bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a b){2,4}) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){2,4} + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 51 total, 0 merged + NFA Contexts: 3 peak, 41 total, 5 pruned + NFA: 5 matched (len 8/8/8.0), 0 mismatched + NFA: 0 absorbed, 30 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a b){2})+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a b){2})+ + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 76 total, 0 merged + NFA Contexts: 4 peak, 61 total, 15 pruned + NFA: 1 matched (len 60/60/60.0), 0 mismatched + NFA: 0 absorbed, 44 skipped (len 1/4/2.3) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Deep nesting (3+ levels) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((((a | b)+)+)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 162 total, 0 merged + NFA Contexts: 2 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 0 absorbed, 39 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Bounded quantifier on alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((a | b){2,3} c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2,3} c + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 200 total, 0 merged + NFA Contexts: 3 peak, 61 total, 2 pruned + NFA: 19 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 38 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Nested groups with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a b)+ c)*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a' b')+" c)* + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 178 total, 0 merged + NFA Contexts: 4 peak, 61 total, 22 pruned + NFA: 1 matched (len 57/57/57.0), 0 mismatched + NFA: 0 absorbed, 37 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Partial nested quantification +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a (b c)+)*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a (b c)+)* + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 160 total, 0 merged + NFA Contexts: 4 peak, 61 total, 22 pruned + NFA: 1 matched (len 57/57/57.0), 0 mismatched + NFA: 0 absorbed, 37 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Window Function Combinations +-- ============================================================ +-- count(*) with pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- first_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- last_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Multiple window functions +CREATE TEMP VIEW rpr_v AS +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- DEFINE Expression Variations +-- ============================================================ +-- Complex boolean expressions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 78 total, 0 merged + NFA Contexts: 2 peak, 51 total, 6 pruned + NFA: 17 matched (len 2/3/2.6), 0 mismatched + NFA: 10 absorbed (len 1/1/1.0), 17 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Using PREV function +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (s u+ d+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: s u+ d+ + Storage: Memory Maximum Storage: NkB + NFA States: 60 peak, 466 total, 0 merged + NFA Contexts: 31 peak, 31 total, 1 pruned + NFA: 0 matched, 29 mismatched (len 2/30/16.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(8 rows) + +DROP VIEW rpr_v; +-- Using NULL comparisons +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series v (actual rows=30.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Large Scale Statistics Verification +-- ============================================================ +-- 500 rows - verify statistics scale correctly +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a+ b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 851 total, 0 merged + NFA Contexts: 3 peak, 501 total, 101 pruned + NFA: 50 matched (len 8/9/9.0), 0 mismatched + NFA: 299 absorbed (len 1/1/1.0), 50 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- High match count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 501 total, 0 merged + NFA Contexts: 2 peak, 501 total, 0 pruned + NFA: 250 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 250 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- High skip count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a b c d e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d e + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 501 total, 0 merged + NFA Contexts: 3 peak, 501 total, 490 pruned + NFA: 5 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(9 rows) + +DROP VIEW rpr_v; +-- Cleanup +DROP TABLE nfa_test; +DROP TABLE nfa_complex; diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out new file mode 100644 index 00000000000..46a463c2597 --- /dev/null +++ b/src/test/regress/expected/rpr_nfa.out @@ -0,0 +1,2524 @@ +-- ============================================================ +-- RPR NFA Tests +-- Tests for Row Pattern Recognition NFA Runtime Execution +-- ============================================================ +-- +-- This test suite validates the NFA (Non-deterministic Finite +-- Automaton) runtime execution engine in nodeWindowAgg.c, +-- focusing on update_reduced_frame and related functions. +-- +-- Test Strategy: +-- Diagonal pattern style using ARRAY flags to explicitly +-- control which pattern variables match at each row. +-- +-- Test Coverage: +-- Basic NFA Flow (match->absorb->advance) +-- Absorption Optimization +-- Context Lifecycle Management +-- Advance Phase (Epsilon Transitions) +-- Match Phase (Variable Matching) +-- Frame Boundary Handling +-- State Management (Deduplication) +-- Statistics and Diagnostics +-- Quantifier Runtime Behavior +-- Pathological Pattern Protection +-- Alternation Runtime Behavior +-- Deep Nested Groups +-- SKIP Options (Runtime) +-- INITIAL Mode (Runtime) +-- Frame Boundary Variations +-- Special Partition Cases +-- DEFINE Special Cases +-- Absorption Dynamic Flags +-- FIXME Issues (Known Limitations) +-- +-- Responsibility: +-- - NFA runtime execution paths +-- - Context/State lifecycle management +-- - Runtime boundary conditions and protections +-- +-- NOT tested here (covered in other files): +-- - Pattern parsing/optimization (rpr_base.sql) +-- - EXPLAIN output (rpr_explain.sql) +-- - PREV/NEXT semantics (rpr.sql) +-- ============================================================ +-- ============================================================ +-- Basic NFA Flow +-- ============================================================ +-- Simple sequential pattern +WITH test_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['_']) -- No match + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {_} | | +(5 rows) + +-- Quantified pattern (A+ B+ C+) +WITH test_quantified AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quantified +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+ C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 7 + 2 | {A} | 2 | 7 + 3 | {A} | 3 | 7 + 4 | {B} | | + 5 | {B} | | + 6 | {C} | | + 7 | {C} | | + 8 | {_} | | +(8 rows) + +-- Optional pattern (A B? C) +WITH test_optional AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- B skipped + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- B matched + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | + 6 | {_} | | +(6 rows) + +-- Alternation pattern (A (B|C) D) +WITH test_alternation AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- First branch + (3, ARRAY['D']), + (4, ARRAY['A']), + (5, ARRAY['C']), -- Second branch + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alternation +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C) D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {D} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {D} | | + 7 | {_} | | +(7 rows) + +-- ============================================================ +-- Absorption Optimization +-- ============================================================ +-- Absorbable pattern (A+) +WITH test_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Mixed absorbable/non-absorbable ((A+) | B) +WITH test_mixed_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- State coverage (same elemIdx, different count) +WITH test_state_coverage AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_state_coverage +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | | + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- ============================================================ +-- Context Lifecycle +-- ============================================================ +-- Multiple overlapping contexts (SKIP TO NEXT ROW) +WITH test_overlapping_contexts AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_overlapping_contexts +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- Failed context cleanup (early failure) +WITH test_context_cleanup AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned at first row + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after row 2 + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_context_cleanup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {A} | | + 3 | {_} | | + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- Partition end (incomplete contexts) +WITH test_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Pattern requires B, but partition ends + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Completed context encountered during processing +-- Pattern (A | B C D): Ctx1 takes long B->C->D path, while Ctx2 takes +-- short A path and completes first. Next row sees Ctx2 +-- with states=NULL and skips it. +WITH test_completed_ctx AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['D', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_completed_ctx +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A | B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B,_} | 1 | 3 + 2 | {C,A} | 2 | 2 + 3 | {D,_} | | + 4 | {_,_} | | +(4 rows) + +-- ============================================================ +-- Advance Phase (Epsilon Transitions) +-- ============================================================ +-- Nested groups ((A B)+) +WITH test_nested_groups AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_groups +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | + 7 | {_} | | +(7 rows) + +-- Multiple alternation branches (A (B|C|D) E) +WITH test_multi_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['E']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['E']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D) E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {E} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {E} | | + 7 | {A} | 7 | 9 + 8 | {D} | | + 9 | {E} | | +(9 rows) + +-- Optional VAR at start (A? B C) +WITH test_optional_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), -- A skipped + (2, ARRAY['C']), + (3, ARRAY['A']), -- A matched + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A? B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 5 + 4 | {B} | 4 | 5 + 5 | {C} | | +(5 rows) + +-- Nested alternation ((A|B) (C|D)) +WITH test_nested_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- A C + (3, ARRAY['A']), + (4, ARRAY['D']), -- A D + (5, ARRAY['B']), + (6, ARRAY['C']), -- B C + (7, ARRAY['B']), + (8, ARRAY['D']) -- B D + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) (C | D)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 4 + 4 | {D} | | + 5 | {B} | 5 | 6 + 6 | {C} | | + 7 | {B} | 7 | 8 + 8 | {D} | | +(8 rows) + +-- ============================================================ +-- Match Phase +-- ============================================================ +-- Simple VAR with END next (A B C all min=max=1) +WITH test_simple_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_simple_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | + 4 | {_} | | +(4 rows) + +-- VAR max exceeded (A{2,3}) +WITH test_max_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Max = 3 + (4, ARRAY['A']), -- Exceeds max, state removed + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,3} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 5 + 3 | {A} | 3 | 5 + 4 | {A} | | + 5 | {B} | | +(5 rows) + +-- Non-matching VAR (DEFINE false) +WITH test_non_matching AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['_']), -- B not matched (DEFINE false) + (3, ARRAY['A']), + (4, ARRAY['B']), -- B matched + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_matching +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {_} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | +(5 rows) + +-- ============================================================ +-- Frame Boundary Handling +-- ============================================================ +-- Limited frame (ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) +WITH test_limited_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), -- Within 3 FOLLOWING + (5, ARRAY['B']), -- Beyond 3 FOLLOWING from row 1 + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_limited_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {B} | | + 6 | {_} | | +(6 rows) + +-- Unbounded frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +WITH test_unbounded_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) -- Far from start, but unbounded + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unbounded_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {A} | 2 | 6 + 3 | {A} | 3 | 6 + 4 | {A} | 4 | 6 + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- Match exceeds frame boundary +WITH test_frame_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Frame ends at row 3 (2 FOLLOWING), B never appears + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Frame boundary forced mismatch +-- Limited frame with enough rows so that a context's frame boundary +-- is exceeded while still processing. +WITH test_frame_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | + 4 | {A} | 4 | 6 + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- ============================================================ +-- State Management +-- ============================================================ +-- Duplicate state creation +WITH test_duplicate_states AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'B']), -- Both A and B match (creates duplicate states via different paths) + (2, ARRAY['C', '_']), + (3, ARRAY['D', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_duplicate_states +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 3 + 2 | {C,_} | | + 3 | {D,_} | | +(3 rows) + +-- Large pattern (stress free list) +WITH test_large_pattern AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']), + (7, ARRAY['G']), + (8, ARRAY['H']), + (9, ARRAY['I']), + (10, ARRAY['J']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags), + G AS 'G' = ANY(flags), + H AS 'H' = ANY(flags), + I AS 'I' = ANY(flags), + J AS 'J' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 10 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | + 7 | {G} | | + 8 | {H} | | + 9 | {I} | | + 10 | {J} | | +(10 rows) + +-- Reduced frame map reallocation (> 1024 rows) +WITH test_map_realloc AS ( + SELECT id, CASE WHEN id % 2 = 1 THEN ARRAY['A'] ELSE ARRAY['B'] END AS flags + FROM generate_series(1, 1100) AS id +) +SELECT count(*), min(match_start), max(match_end) +FROM ( + SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end + FROM test_map_realloc + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) + ) +) sub; + count | min | max +-------+-----+------ + 1100 | 1 | 1100 +(1 row) + +-- ============================================================ +-- Statistics and Diagnostics +-- ============================================================ +-- Matched contexts +WITH test_matched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_matched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Pruned contexts (failed at first row) +WITH test_pruned AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned + (2, ARRAY['_']), -- Pruned + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_pruned +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {_} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Mismatched contexts (failed after multiple rows) +WITH test_mismatched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after 2 rows + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mismatched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {_} | | + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- Absorbed contexts +WITH test_absorbed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Skipped contexts (SKIP TO NEXT ROW) +WITH test_skipped AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Completes match starting at row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skipped +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- ============================================================ +-- Quantifier Runtime Behavior +-- ============================================================ +-- Large count handling (A{100}) +WITH test_large_count AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 105) i +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_count +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{100}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +-----+-------+-------------+----------- + 1 | {A} | 1 | 100 + 2 | {A} | 2 | 101 + 3 | {A} | 3 | 102 + 4 | {A} | 4 | 103 + 5 | {A} | 5 | 104 + 6 | {A} | 6 | 105 + 7 | {A} | | + 8 | {A} | | + 9 | {A} | | + 10 | {A} | | + 11 | {A} | | + 12 | {A} | | + 13 | {A} | | + 14 | {A} | | + 15 | {A} | | + 16 | {A} | | + 17 | {A} | | + 18 | {A} | | + 19 | {A} | | + 20 | {A} | | + 21 | {A} | | + 22 | {A} | | + 23 | {A} | | + 24 | {A} | | + 25 | {A} | | + 26 | {A} | | + 27 | {A} | | + 28 | {A} | | + 29 | {A} | | + 30 | {A} | | + 31 | {A} | | + 32 | {A} | | + 33 | {A} | | + 34 | {A} | | + 35 | {A} | | + 36 | {A} | | + 37 | {A} | | + 38 | {A} | | + 39 | {A} | | + 40 | {A} | | + 41 | {A} | | + 42 | {A} | | + 43 | {A} | | + 44 | {A} | | + 45 | {A} | | + 46 | {A} | | + 47 | {A} | | + 48 | {A} | | + 49 | {A} | | + 50 | {A} | | + 51 | {A} | | + 52 | {A} | | + 53 | {A} | | + 54 | {A} | | + 55 | {A} | | + 56 | {A} | | + 57 | {A} | | + 58 | {A} | | + 59 | {A} | | + 60 | {A} | | + 61 | {A} | | + 62 | {A} | | + 63 | {A} | | + 64 | {A} | | + 65 | {A} | | + 66 | {A} | | + 67 | {A} | | + 68 | {A} | | + 69 | {A} | | + 70 | {A} | | + 71 | {A} | | + 72 | {A} | | + 73 | {A} | | + 74 | {A} | | + 75 | {A} | | + 76 | {A} | | + 77 | {A} | | + 78 | {A} | | + 79 | {A} | | + 80 | {A} | | + 81 | {A} | | + 82 | {A} | | + 83 | {A} | | + 84 | {A} | | + 85 | {A} | | + 86 | {A} | | + 87 | {A} | | + 88 | {A} | | + 89 | {A} | | + 90 | {A} | | + 91 | {A} | | + 92 | {A} | | + 93 | {A} | | + 94 | {A} | | + 95 | {A} | | + 96 | {A} | | + 97 | {A} | | + 98 | {A} | | + 99 | {A} | | + 100 | {A} | | + 101 | {A} | | + 102 | {A} | | + 103 | {A} | | + 104 | {A} | | + 105 | {A} | | +(105 rows) + +-- Unlimited quantifier (A{10,}) +WITH test_unlimited AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 15) i + UNION ALL + SELECT 16, ARRAY['B'] +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unlimited +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{10,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 16 + 2 | {A} | 2 | 16 + 3 | {A} | 3 | 16 + 4 | {A} | 4 | 16 + 5 | {A} | 5 | 16 + 6 | {A} | 6 | 16 + 7 | {A} | | + 8 | {A} | | + 9 | {A} | | + 10 | {A} | | + 11 | {A} | | + 12 | {A} | | + 13 | {A} | | + 14 | {A} | | + 15 | {A} | | + 16 | {B} | | +(16 rows) + +-- Min boundary (A{3,5}) +WITH test_min_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Min=3 reached, exit path available + (4, ARRAY['B']), -- Match ends at min + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), -- Count=5, max reached + (10, ARRAY['B']) -- Match ends at max + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_min_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {B} | | + 5 | {A} | 5 | 10 + 6 | {A} | 6 | 10 + 7 | {A} | 7 | 10 + 8 | {A} | | + 9 | {A} | | + 10 | {B} | | +(10 rows) + +-- Max boundary exceeded (A{3,5}) +WITH test_max_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), -- Count=6 > max=5, row 1 context removed + (7, ARRAY['B']) -- Row 1 context: no match (exceeded max) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 7 + 3 | {A} | 3 | 7 + 4 | {A} | 4 | 7 + 5 | {A} | | + 6 | {A} | | + 7 | {B} | | +(7 rows) + +-- ============================================================ +-- Pathological Pattern Runtime Protection +-- ============================================================ +-- Complex nested nullable ((A* B*)*) - Runtime protection +WITH test_complex_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_complex_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {B} | 3 | 4 + 4 | {B} | 4 | 4 + 5 | {C} | | +(5 rows) + +-- Nested nullable with quantifier ((A{0,3})*) +WITH test_nested_quantifier AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_quantifier +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A{0,3})*) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | | +(4 rows) + +-- ============================================================ +-- Alternation Runtime Behavior +-- ============================================================ +-- Multi-branch alternation (A (B|C|D|E) F) +WITH test_multi_branch AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['F']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['F']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['F']), + (10, ARRAY['A']), + (11, ARRAY['E']), + (12, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_branch +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D | E) F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {F} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {F} | | + 7 | {A} | 7 | 9 + 8 | {D} | | + 9 | {F} | | + 10 | {A} | 10 | 12 + 11 | {E} | | + 12 | {F} | | +(12 rows) + +-- Alternation with quantifiers (A+ | B+ | C+) +WITH test_alt_quantifiers AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['C']), + (9, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_quantifiers +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ | B+ | C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 5 + 5 | {B} | 5 | 5 + 6 | {C} | 6 | 9 + 7 | {C} | 7 | 9 + 8 | {C} | 8 | 9 + 9 | {C} | 9 | 9 +(9 rows) + +-- altPriority replacement (A B C | D) +-- D branch (higher altPriority) matches first at row 1, +-- then A B C branch (lower altPriority) replaces it at row 3. +WITH test_alt_replace AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'D']), + (2, ARRAY['B', '_']), + (3, ARRAY['C', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_replace +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C | D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,D} | 1 | 3 + 2 | {B,_} | | + 3 | {C,_} | | + 4 | {_,_} | | +(4 rows) + +-- ============================================================ +-- Deep Nested Groups +-- ============================================================ +-- Three-level nesting ((((A B)+)+)+) +WITH test_deep_nesting AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_deep_nesting +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((((A B)+)+)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | + 7 | {_} | | +(7 rows) + +-- Multiple groups in nesting (((A B) (C D))+) +WITH test_nested_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['C']), + (8, ARRAY['D']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B) (C D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {C} | | + 8 | {D} | | + 9 | {_} | | +(9 rows) + +-- Nested END→END max reached +-- Inner group (A B){2} reaches max=2 → exits to outer END +WITH test_end_nested_max AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_max +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {A} | | + 8 | {B} | | + 9 | {_} | | +(9 rows) + +-- Nested END→END between min/max +-- Inner group (A B){1,3} exits between min/max → outer END count++ +WITH test_end_nested_mid AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_mid +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){1,3})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {A} | 3 | 8 + 4 | {B} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {A} | 7 | 8 + 8 | {B} | | + 9 | {_} | | +(9 rows) + +-- ============================================================ +-- SKIP Options (Runtime) +-- ============================================================ +-- SKIP PAST LAST ROW (non-overlapping matches) +WITH test_skip_past AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_past +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | | + 5 | {_} | | +(5 rows) + +-- SKIP TO NEXT ROW (overlapping matches) +WITH test_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- SKIP difference verification +WITH test_skip_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +ORDER BY mode, id; + mode | id | flags | match_start | match_end +-----------+----+-------+-------------+----------- + SKIP NEXT | 1 | {A} | 1 | 2 + SKIP NEXT | 2 | {B} | | + SKIP NEXT | 3 | {A} | 3 | 4 + SKIP NEXT | 4 | {B} | | + SKIP PAST | 1 | {A} | 1 | 2 + SKIP PAST | 2 | {B} | | + SKIP PAST | 3 | {A} | 3 | 4 + SKIP PAST | 4 | {B} | | +(8 rows) + +-- ============================================================ +-- INITIAL Mode (Runtime) +-- ============================================================ +-- INITIAL mode (not yet supported - produces syntax error) +WITH test_initial_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_initial_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); +ERROR: syntax error at or near "AFTER" +LINE 18: AFTER MATCH SKIP TO NEXT ROW + ^ +-- Default mode (include all rows) +WITH test_default_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched, but included + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched, but included + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_default_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {_} | | + 5 | {A} | 5 | 5 +(5 rows) + +-- Mode difference verification (INITIAL not yet supported - produces syntax error) +WITH test_mode_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), + (2, ARRAY['A']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'INITIAL' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +UNION ALL +SELECT 'DEFAULT' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +ORDER BY mode; +ERROR: syntax error at or near "AFTER" +LINE 15: AFTER MATCH SKIP TO NEXT ROW + ^ +-- ============================================================ +-- Frame Boundary Variations +-- ============================================================ +-- Very limited frame (1 FOLLOWING) +WITH test_one_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- Within 1 FOLLOWING + (3, ARRAY['A']), -- Beyond 1 FOLLOWING from row 1 + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_one_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Medium frame (10 FOLLOWING) +WITH test_ten_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), + (10, ARRAY['A']), + (11, ARRAY['B']), -- Within 10 FOLLOWING from row 1 + (12, ARRAY['A']), + (13, ARRAY['B']) -- Beyond 10 FOLLOWING from row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_ten_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 11 + 2 | {A} | 2 | 11 + 3 | {A} | 3 | 11 + 4 | {A} | 4 | 11 + 5 | {A} | 5 | 11 + 6 | {A} | 6 | 11 + 7 | {A} | 7 | 11 + 8 | {A} | 8 | 11 + 9 | {A} | 9 | 11 + 10 | {A} | 10 | 11 + 11 | {B} | | + 12 | {A} | 12 | 13 + 13 | {B} | | +(13 rows) + +-- Exact boundary match +WITH test_exact_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) -- Exactly at 4 FOLLOWING (frame end) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_exact_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | 2 | 5 + 3 | {A} | 3 | 5 + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- ============================================================ +-- Special Partition Cases +-- ============================================================ +-- Empty partition (0 rows) +WITH test_empty_partition AS ( + SELECT * FROM (VALUES + (1, 1, ARRAY['A']), + (2, 2, ARRAY['_']) -- Different partition + ) AS t(id, part, flags) +) +SELECT id, part, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_empty_partition +WHERE part = 99 -- No rows match +WINDOW w AS ( + PARTITION BY part + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + id | part | flags | match_start | match_end +----+------+-------+-------------+----------- +(0 rows) + +-- Single row partition +WITH test_single_row AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_single_row +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 +(1 row) + +-- All rows fail matching (all DEFINE false) +WITH test_all_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_all_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS false -- All rows fail +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Partition end with absorbable pattern +-- SKIP PAST LAST ROW + unbounded frame + all rows match A +-- Triggers absorb in !rowExists path at partition boundary. +WITH test_absorb_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorb_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | | + 5 | {A} | | +(5 rows) + +-- ============================================================ +-- DEFINE Special Cases +-- ============================================================ +-- Undefined variable in DEFINE +WITH test_undefined_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['X']), -- B not defined, defaults to TRUE + (3, ARRAY['C']), + (4, ARRAY['A']), + (5, ARRAY['_']), -- B defaults to TRUE, but no flags + (6, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_undefined_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + -- B is undefined, defaults to TRUE + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {X} | | + 3 | {C} | | + 4 | {A} | 4 | 6 + 5 | {_} | | + 6 | {C} | | +(6 rows) + +-- ============================================================ +-- Absorption Dynamic Flags +-- ============================================================ +-- Partial absorbable pattern ((A+) B) +WITH test_partial_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partial_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- Dynamic flag update ((A+) | B) +WITH test_dynamic_flags AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_dynamic_flags +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {A} | 5 | 5 + 6 | {B} | 6 | 6 +(6 rows) + +-- Non-absorbable context during absorption +-- Pattern (A B)+ C: A,B in absorbable group, C is not. +-- When END exits to C via nfa_state_create, isAbsorbable becomes false. +WITH test_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {C} | | + 6 | {_} | | +(6 rows) + +-- Absorption flags early return (!hasAbsorbableState) +-- Pattern (A B)+ C D with SKIP PAST LAST ROW +-- After reaching C (non-absorbable), hasAbsorbableState becomes false. +-- On next row (D), the early return fires. +WITH test_absorption_early_return AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorption_early_return +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {C} | | + 6 | {D} | | + 7 | {_} | | +(7 rows) + +-- Coverage failure: older can't cover newer's states +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: only A → Ctx1 takes A branch only (B fails). +-- Row 2: A and B → Ctx2 takes both branches. +-- Absorption: Ctx1 has A but no B → can't cover Ctx2's B state → fails. +WITH test_coverage_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', '_']), + (2, ARRAY['A', 'B']), + (3, ARRAY['A', '_']), + (4, ARRAY['A', '_']), + (5, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_coverage_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {A,B} | | + 3 | {A,_} | | + 4 | {A,_} | | + 5 | {_,_} | | +(5 rows) + +-- Absorb skips completed context (older->states==NULL) +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: A only → Ctx1 takes A branch. Row 2: B only → Ctx1 A fails (completed). +-- Ctx2 takes B branch. Absorption: Ctx1 states==NULL → skip. +WITH test_older_completed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_completed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {B} | 2 | 3 + 3 | {B} | | + 4 | {_} | | +(4 rows) + +-- Absorb skips non-absorbable context (!hasAbsorbableState) +-- Pattern A+ | B C with SKIP PAST LAST ROW (only A+ branch absorbable). +-- Row 1: B only → Ctx1 takes B branch (non-absorbable), advances to C. +-- Row 2: C,A → Ctx1 C matches (hasAbsorbableState=false). Ctx2 takes A (absorbable). +-- Absorption: Ctx1 !hasAbsorbableState → skip. +WITH test_older_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['_', 'A']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B,_} | 1 | 2 + 2 | {C,A} | | + 3 | {_,A} | 3 | 3 + 4 | {_,_} | | +(4 rows) + +-- ============================================================ +-- FIXME Issues - Known Limitations +-- ============================================================ +-- FIXME 1 - altPriority lexical order +WITH test_alt_priority_repeated AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- Both A and B match + (2, ARRAY['A','B']), + (3, ARRAY['A','B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_repeated +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 3 + 2 | {A,B} | 2 | 3 + 3 | {A,B} | 3 | 3 +(3 rows) + +-- FIXME 1 - Nested ALT lexical order +WITH test_alt_priority_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['C','D']), + (3, ARRAY['A','B']), + (4, ARRAY['C','D']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A | B) (C | D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 4 + 2 | {C,D} | | + 3 | {A,B} | 3 | 4 + 4 | {C,D} | | +(4 rows) + +-- FIXME 2 - Cycle prevention at count > 0 +WITH test_cycle_nonzero AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Inner A* matches 0, cycles at count=3 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_nonzero +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A*)*) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | | +(4 rows) + +-- FIXME 2 - Cycle with mixed nullables +WITH test_cycle_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {C} | | +(4 rows) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 549e9b2d7be..d9f879a7624 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -104,6 +104,11 @@ test: publication subscription # ---------- test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock indirect_toast equivclass stats_rewrite +# ---------- +# Row Pattern Recognition tests +# ---------- +test: rpr rpr_base rpr_explain rpr_nfa + # ---------- # Another group of parallel tests (JSON related) # ---------- diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql new file mode 100644 index 00000000000..7690c5611c0 --- /dev/null +++ b/src/test/regress/sql/rpr.sql @@ -0,0 +1,2180 @@ +-- +-- Test for row pattern definition clause +-- + +CREATE TEMP TABLE stock ( + company TEXT, + tdate DATE, + price INTEGER +); +INSERT INTO stock VALUES ('company1', '2023-07-01', 100); +INSERT INTO stock VALUES ('company1', '2023-07-02', 200); +INSERT INTO stock VALUES ('company1', '2023-07-03', 150); +INSERT INTO stock VALUES ('company1', '2023-07-04', 140); +INSERT INTO stock VALUES ('company1', '2023-07-05', 150); +INSERT INTO stock VALUES ('company1', '2023-07-06', 90); +INSERT INTO stock VALUES ('company1', '2023-07-07', 110); +INSERT INTO stock VALUES ('company1', '2023-07-08', 130); +INSERT INTO stock VALUES ('company1', '2023-07-09', 120); +INSERT INTO stock VALUES ('company1', '2023-07-10', 130); +INSERT INTO stock VALUES ('company2', '2023-07-01', 50); +INSERT INTO stock VALUES ('company2', '2023-07-02', 2000); +INSERT INTO stock VALUES ('company2', '2023-07-03', 1500); +INSERT INTO stock VALUES ('company2', '2023-07-04', 1400); +INSERT INTO stock VALUES ('company2', '2023-07-05', 1500); +INSERT INTO stock VALUES ('company2', '2023-07-06', 60); +INSERT INTO stock VALUES ('company2', '2023-07-07', 1100); +INSERT INTO stock VALUES ('company2', '2023-07-08', 1300); +INSERT INTO stock VALUES ('company2', '2023-07-09', 1200); +INSERT INTO stock VALUES ('company2', '2023-07-10', 1300); + +SELECT * FROM stock; + +-- basic test using PREV +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- basic test using PREV. UP appears twice +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+ UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- basic test using PREV. Use '*' +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP* DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- basic test using PREV. Use '?' +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP? DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using alternation (|) with sequence +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using alternation (|) with group quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using nested alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START ((UP DOWN) | FLAT)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + FLAT AS price = PREV(price) +); + +-- test using group with quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((UP DOWN)+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using absolute threshold values (not relative PREV) +-- HIGH: price > 150, LOW: price < 100, MID: neutral range +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW MID* HIGH) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + +-- test threshold-based pattern with alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW (MID | HIGH)+) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + +-- basic test with none-greedy pattern +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); + +-- test using {n} quantifier (A A A should be optimized to A{3}) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price >= 140 AND price <= 150 +); + +-- test using {n,} quantifier (2 or more) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); + +-- test using {n,m} quantifier (2 to 4) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,4}) + DEFINE + A AS price > 100 +); + +-- last_value() should remain consistent +SELECT company, tdate, price, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- omit "START" in DEFINE but it is ok because "START AS TRUE" is +-- implicitly defined. per spec. +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- the first row start with less than or equal to 100 +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- second row raises 120% +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price) * 1.2, + DOWN AS price < PREV(price) +); + +-- using NEXT +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + +-- match everything + +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+) + DEFINE + A AS TRUE +); + +-- nth_value beyond reduced frame (no IGNORE NULLS) +-- Tests WinGetSlotInFrame/WinGetFuncArgInFrame out-of-frame with RPR +SELECT company, tdate, price, + nth_value(price, 5) OVER w AS nth_5 +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + +-- SKIP TO NEXT ROW with limited frame (Ishii-san's test case) +-- Each row should produce its own match within its frame +WITH data AS ( + SELECT * FROM (VALUES + ('A', 1), ('A', 2), + ('B', 3), ('B', 4) + ) AS t(gid, id) +) +SELECT gid, id, array_agg(id) OVER w +FROM data +WINDOW w AS ( + PARTITION BY gid + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS id < 10 +); + +-- Limited frame with absorption test +-- Row 0: frame [0,2], can't see B at row 3 -> no match +-- Row 1: frame [1,3], can see A A B -> should match rows 1-3 +WITH frame_absorb_test AS ( + SELECT * FROM (VALUES + (0, 'A'), (1, 'A'), (2, 'A'), (3, 'B') + ) AS t(id, flag) +) +SELECT id, flag, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM frame_absorb_test +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS flag = 'A', + B AS flag = 'B' +); + +-- ROWS BETWEEN CURRENT ROW AND offset FOLLOWING +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w, + count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- +-- Aggregates +-- + +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP PAST LAST ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP TO NEXT ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + +-- JOIN case +CREATE TEMP TABLE t1 (i int, v1 int); +CREATE TEMP TABLE t2 (j int, v2 int); +INSERT INTO t1 VALUES(1,10); +INSERT INTO t1 VALUES(1,11); +INSERT INTO t1 VALUES(1,12); +INSERT INTO t2 VALUES(2,10); +INSERT INTO t2 VALUES(2,11); +INSERT INTO t2 VALUES(2,12); + +SELECT * FROM t1, t2 WHERE t1.v1 <= 11 AND t2.v2 <= 11; + +SELECT *, count(*) OVER w FROM t1, t2 +WINDOW w AS ( + PARTITION BY t1.i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE + A AS v1 <= 11 AND v2 <= 11 +); + +-- WITH case +WITH wstock AS ( + SELECT * FROM stock WHERE tdate < '2023-07-08' +) +SELECT tdate, price, +first_value(tdate) OVER w, +count(*) OVER w + FROM wstock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- ReScan test: LATERAL join forces WindowAgg rescan with RPR +-- Tests ExecReScanWindowAgg clearing prev_slot/next_slot +SELECT g.x, sub.* +FROM generate_series(1, 2) g(x), +LATERAL ( + SELECT id, price, count(*) OVER w AS c + FROM (VALUES (1, 100), (2, 200), (3, 150)) AS t(id, price) + WHERE id <= g.x + 1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price) + ) +) sub +ORDER BY g.x, sub.id; + +-- PREV has multiple column reference +CREATE TEMP TABLE rpr1 (id INTEGER, i SERIAL, j INTEGER); +INSERT INTO rpr1(id, j) SELECT 1, g*2 FROM generate_series(1, 10) AS g; +SELECT id, i, j, count(*) OVER w + FROM rpr1 + WINDOW w AS ( + PARTITION BY id + ORDER BY i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (START COND+) + DEFINE + START AS TRUE, + COND AS PREV(i + j + 1) < 10 +); + +-- Smoke test for larger partitions. +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r+ ) + DEFINE r AS TRUE + ) +) +-- Should be exactly one long match across all rows. +SELECT * FROM s WHERE c > 0; + +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r ) + DEFINE r AS TRUE + ) +) +-- Every row should be its own match. +SELECT count(*) FROM s WHERE c > 0; + +-- Large partition test: 100K rows with A+ B* C{10000,} pattern +-- Tests that int32 count doesn't overflow with large repetitions +WITH data AS ( + SELECT generate_series(0, 100000) AS v +), +result AS ( + SELECT v, + count(*) OVER w AS match_len, + first_value(v) OVER w AS match_first, + last_value(v) OVER w AS match_last + FROM data + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B* C{10000,}) + DEFINE + A AS v < 33333, + B AS v >= 33333 AND v < 66666, + C AS v >= 66666 AND v < 99999 + ) +) +-- Should match: A (33333 rows) + B (33333 rows) + C (33333 rows) = 99999 rows +SELECT match_first, match_last, match_len FROM result WHERE match_len > 0; + +-- +-- Using IGNORE NULLS +-- +-- no NULL rows case. The result should be identical with "basic test using PREV" +SELECT company, tdate, price, first_value(price) IGNORE NULLS OVER w, + last_value(price) IGNORE NULLS OVER w, + nth_value(tdate, 2) IGNORE NULLS OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- nth_value with IGNORE NULLS option wants to find the second row but +-- due a NULL in the midlle, it returns the third row. +WITH data AS ( + SELECT * FROM (VALUES + (10, 1), (11, NULL), (12, 3), (13, 4) + ) AS t(gid, id)) + SELECT gid, id, nth_value(id, 2) IGNORE NULLS OVER w AS second_val, + array_agg(id) OVER w + FROM data + WINDOW w AS ( + ORDER BY gid + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS gid < 13 + ); + +-- nth_value with IGNORE NULLS option wants to find the third row but +-- due a NULL in the midlle, it reaches the end of reduced frame and +-- return NULL +WITH data AS ( + SELECT * FROM (VALUES + (10, 1), (11, NULL), (12, 3), (13, 4) + ) AS t(gid, id)) + SELECT gid, id, nth_value(id, 3) IGNORE NULLS OVER w AS thrid_val, + array_agg(id) OVER w + FROM data + WINDOW w AS ( + ORDER BY gid + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS gid < 13 + ); + +-- nth_value beyond reduced frame with IGNORE NULLS +-- Tests ignorenulls_getfuncarginframe early out-of-frame check +SELECT company, tdate, price, + nth_value(price, 5) IGNORE NULLS OVER w AS nth_5_in +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- View and pg_get_viewdef tests. +CREATE TEMP VIEW v_window AS +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +SELECT * FROM v_window; +SELECT pg_get_viewdef('v_window'); + +-- +-- Pattern optimization tests +-- VIEW shows original pattern, EXPLAIN shows optimized pattern +-- + +-- Test: duplicate alternatives removal (A | B | A)+ -> (A | B)+ +CREATE TEMP VIEW v_opt_dup AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B | A)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup'); -- original: ((a | b | a)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup; -- optimized: ((a | b)+) + +-- Test: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ +CREATE TEMP VIEW v_opt_dup_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B)+ | (A | B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup_group'); -- original: ((a | b)+ | (a | b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup_group; -- optimized: ((a | b)+) + +-- Test: consecutive vars merge (A A A) -> A{3} +CREATE TEMP VIEW v_opt_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); +SELECT pg_get_viewdef('v_opt_merge'); -- original: (a a a) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge; -- optimized: a{3} + +-- Test: quantified vars merge (A A+ A) -> A{3,} +CREATE TEMP VIEW v_opt_merge_quant AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A+ A) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_quant'); -- original: (a a+ a) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_quant; -- optimized: a{3,} + +-- Test: merge two unbounded (A+ A+) -> A{2,} +CREATE TEMP VIEW v_opt_merge_unbounded AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+ A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_unbounded'); -- original: (a+ a+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_unbounded; -- optimized: a{2,} + +-- Test: merge with zero-min (A* A+) -> A+ +CREATE TEMP VIEW v_opt_merge_star AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A* A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_star'); -- original: (a* a+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_star; -- optimized: a+ + +-- Test: complex merge (A A{2} A+ A{3}) -> A{7,} +CREATE TEMP VIEW v_opt_merge_complex AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A{2} A+ A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_complex'); -- original: (a a{2} a+ a{3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_complex; -- optimized: a{7,} + +-- Test: group merge ((A B) (A B)+) -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group'); -- original: ((a b) (a b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group; -- expected: (a b){2,} + +-- Test: group merge A B (A B)+ -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group2'); -- original: (a b (a b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group2; -- expected: (a b){2,} + +-- Test: group merge (A B) (A B)+ (A B) -> (A B){3,} +CREATE TEMP VIEW v_opt_merge_group3 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+ (A B)) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group3'); -- original: ((a b) (a b)+ (a b)) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group3; -- expected: (a b){3,} + +-- Test: group merge A B A B (A B)+ A B A B -> (A B){5,} +CREATE TEMP VIEW v_opt_merge_group4 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B A B (A B)+ A B A B) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group4'); -- original: (a b a b (a b)+ a b a b) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group4; -- expected: (a b){5,} + +-- Test: group merge C A B (A B)+ A B C -> C (A B){3,} C +CREATE TEMP VIEW v_opt_merge_group5 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (C A B (A B)+ A B C) + DEFINE + A AS price > 100, + B AS price <= 100, + C AS price > 200 +); +SELECT pg_get_viewdef('v_opt_merge_group5'); -- original: (c a b (a b)+ a b c) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group5; -- expected: c (a b){3,} c + +-- Test: consecutive GROUP merge (A B)+ (A B)+ -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_consec_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B)+ (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_consec_group'); -- original: ((a b)+ (a b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group; -- expected: (a b){2,} + +-- Test: consecutive GROUP merge with different quantifiers (A B){2} (A B){3} -> (A B){5} +CREATE TEMP VIEW v_opt_merge_consec_group2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B){2} (A B){3}) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_consec_group2'); -- original: ((a b){2} (a b){3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group2; -- expected: (a b){5} + +-- Test {n} quantifier display +CREATE TEMP VIEW v_quantifier_n AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n'); + +-- Test {n,} quantifier display +CREATE TEMP VIEW v_quantifier_n_plus AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n_plus'); + +-- Test: flatten nested SEQ (A (B C)) -> A B C +CREATE TEMP VIEW v_opt_flatten_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A (B C)) + DEFINE + A AS price > 100, + B AS price > 150, + C AS price < 150 +); +SELECT pg_get_viewdef('v_opt_flatten_seq'); -- original: (a (b c)) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_seq; -- optimized: a b c + +-- Test: flatten nested ALT (A | (B | C)) -> (A | B | C) +CREATE TEMP VIEW v_opt_flatten_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | (B | C))+) + DEFINE + A AS price > 200, + B AS price > 100, + C AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_flatten_alt'); -- original: ((a | (b | c))+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_alt; -- optimized: ((a | b | c))+ + +-- Test: unwrap GROUP{1,1} ((A)) -> A +CREATE TEMP VIEW v_opt_unwrap_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A))) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_unwrap_group'); -- original: (((a))) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_group; -- optimized: a + +-- Test: quantifier multiplication (A{2}){3} -> A{6} +CREATE TEMP VIEW v_opt_quant_mult AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult'); -- original: ((a{2}){3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult; -- optimized: a{6} + +-- Test: quantifier multiplication (A{2,4}){3} -> A{6,12} +CREATE TEMP VIEW v_opt_quant_mult_range AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2,4}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range'); -- original: ((a{2,4}){3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range; -- optimized: a{6,12} + +-- Test: quantifier multiplication blocked (A{2}){3,5} -> no change +-- outer range with child exact > 1 causes gaps (6,8,10 not 6,7,8,9,10) +CREATE TEMP VIEW v_opt_quant_mult_range2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3,5}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range2'); -- original: ((a{2}){3,5}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range2; -- NOT optimized: (a{2}){3,5} + +-- Test: quantifier multiplication blocked by INF (A+){3} -> no change +CREATE TEMP VIEW v_opt_quant_mult_inf AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A+){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_inf'); -- original: ((a+){3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_inf; -- no multiply: (a+){3} + +-- Test: unwrap single-item ALT after duplicate removal (A | A) -> A +CREATE TEMP VIEW v_opt_unwrap_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | A)+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_unwrap_alt'); -- original: ((a | a)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_alt; -- optimized: a+ + +-- Test: GROUP{1,1} to SEQ with flatten ((A B)(C D)) -> A B C D +CREATE TEMP VIEW v_opt_group_to_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A B)(C D))) + DEFINE + A AS price > 200, + B AS price > 150, + C AS price > 100, + D AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_group_to_seq'); -- original: (((a b)(c d))) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_group_to_seq; -- optimized: a b c d + +-- Test: combined consecutive GROUP + prefix merge A B (A B)+ (A B)+ -> (A B){3,} +CREATE TEMP VIEW v_opt_combined_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B (A B)+ (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_combined_merge'); -- original: (a b (a b)+ (a b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_combined_merge; -- expected: (a b){3,} + +-- Test: nested ALT pattern - bug reproduction +CREATE TEMP VIEW v_opt_nested_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A B) | C) D | A B C) + DEFINE + A AS price <= 100, + B AS price <= 150, + C AS price <= 200, + D AS price > 200 +); +SELECT pg_get_viewdef('v_opt_nested_alt'); +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt; + +-- Test: nested ALT with unbounded - A+ inside +CREATE TEMP VIEW v_opt_nested_alt2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A+ B) | C) D | A B C) + DEFINE + A AS price <= 100, + B AS price <= 150, + C AS price <= 200, + D AS price > 200 +); +SELECT pg_get_viewdef('v_opt_nested_alt2'); +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt2; + +-- +-- Error cases +-- + +-- row pattern definition variable name must not appear more than once +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + UP AS price > PREV(price) +); + +-- subqueries in DEFINE clause are not supported +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START LOWPRICE) + DEFINE + START AS TRUE, + LOWPRICE AS price < (SELECT 100) +); + +-- aggregates in DEFINE clause are not supported +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START LOWPRICE) + DEFINE + START AS TRUE, + LOWPRICE AS price < count(*) +); + +-- FRAME must start at current row when row pattern recognition is used +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- EXCLUDE is not permitted +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE CURRENT ROW + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- SEEK is not supported +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + SEEK + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- PREV's argument must have at least 1 column reference +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(1), + DOWN AS price < PREV(1) +); + +-- Unsupported quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UP~ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(1), + DOWN AS price < PREV(1) +); + +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UP+? DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(1), + DOWN AS price < PREV(1) +); + +-- Maximum pattern variables is 251 (RPR_VARID_MAX) + +-- Error: 252 variables exceeds limit of 251 +DO $$ +DECLARE + pattern_vars text; + define_vars text; + query text; +BEGIN + SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), + string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') + INTO pattern_vars, define_vars + FROM generate_series(1, 252) i; + + query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (%s) + DEFINE %s)', pattern_vars, define_vars); + + EXECUTE query; +END; +$$; + +-- Error: 253 variables exceeds limit of 251 +DO $$ +DECLARE + pattern_vars text; + define_vars text; + query text; +BEGIN + SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), + string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') + INTO pattern_vars, define_vars + FROM generate_series(1, 253) i; + + query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (%s) + DEFINE %s)', pattern_vars, define_vars); + + EXECUTE query; +END; +$$; + + CREATE TEMP TABLE stock_null (company TEXT, tdate DATE, price INTEGER); + INSERT INTO stock_null VALUES ('c1', '2023-07-01', 100); + INSERT INTO stock_null VALUES ('c1', '2023-07-02', NULL); -- NULL in middle + INSERT INTO stock_null VALUES ('c1', '2023-07-03', 200); + INSERT INTO stock_null VALUES ('c1', '2023-07-04', 150); + + SELECT company, tdate, price, count(*) OVER w AS match_count + FROM stock_null + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP DOWN) + DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < +PREV(price) + ); + + +-- Overlapping match tests (requires multi-context for correct behavior) +-- Using array flags: 'X' = ANY(flags) for multi-TRUE support + +-- Test 1: A B C D E | B C D | C D E F - three overlapping patterns +-- Different end points: B C D (4), A B C D E (5), C D E F (6) +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); +-- PAST LAST: only one match +-- TO NEXT ROW with multi-context: three matches +-- Row 1: A B C D E (1-5) +-- Row 2: B C D (2-4) <- ends first! +-- Row 3: C D E F (3-6) <- ends last! + +-- Test 1b: Longer pattern FAILS, shorter pattern should survive +-- Pattern: A+ B C D E | B+ C +-- A+ B C D E fails (no E found in sequence) +-- B+ C matches at rows 2-3 +-- Result: match 2-3 (B+ C) +WITH test_overlap1b AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1b +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C D E | B+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + +-- Test 2: A B+ C | B+ D - long B sequence with different endings +WITH test_overlap2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['B']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+ C | B+ D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- Current result (correct): +-- Row 1: A B+ C (1-6) +-- Row 7-9: B+ D (7-10, 8-10, 9-10) +-- Note: Row 2-6 cannot match B+ D because Row 6 is C, not D +-- With absorption: 8-10 and 9-10 would be absorbed by 7-10 (earlier context covers later) + +-- Test 3: Greedy quantifier with late failure - A B C+ D | A B +-- Pattern expects D after C+, but E comes instead ("betrayal") +WITH test_betrayal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['C']), + (5, ARRAY['C']), + (6, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_betrayal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C+ D | A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- A B C+ D fails at Row 6 (E instead of D) +-- Question: Does it fallback to A B (1-2)? + +-- Test 4: Lexical Order test - A B C | A B C D E +-- SQL standard: first matching alternative wins +WITH test_lexical AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C | A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- SQL standard Lexical Order: A B C (1-3) wins (first alternative) + +-- Test 4b: Reversed pattern order - A B C D E | A B C +WITH test_lexical2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- SQL standard Lexical Order: A B C D E (1-5) wins (first alternative) + +-- Test 5: Multiple TRUE in single row (overlapping pattern variables) +-- Each row matches multiple DEFINE conditions simultaneously +WITH test_multi_true AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- A and B both TRUE + (2, ARRAY['B','C']), -- B and C both TRUE + (3, ARRAY['C','D']), -- C and D both TRUE + (4, ARRAY['D','E']), -- D and E both TRUE + (5, ARRAY['E','_']) -- E only + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_true +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- Row 1: A=T, B=T -> matches A +-- Row 2: B=T, C=T -> matches B +-- Row 3: C=T, D=T -> matches C +-- Row 4: D=T, E=T -> matches D +-- Row 5: E=T -> matches E +-- Result: match 1-5 (A B C D E) + +-- Test 6: Diagonal pattern with multi-TRUE (shifted overlap) +WITH test_diagonal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['B','A']), + (3, ARRAY['C','B']), + (4, ARRAY['D','C']), + (5, ARRAY['_','D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_diagonal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- Possible matches: +-- Start Row 1: A(1) B(2) C(3) D(4) -> 1-4 +-- Start Row 2: A(2) B(3) C(4) D(5) -> 2-5 (because Row 2 has A too!) + +-- =================================================================== +-- Context Absorption Tests +-- =================================================================== + +-- Test absorption 1: Basic A+ pattern - later contexts absorbed by earlier +WITH test_absorb_basic AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_basic +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS 'A' = ANY(flags) +); +-- Pattern A+ is absorbable (unbounded first element, only one unbounded) +-- 4 matches: (1-4, 2-4, 3-4, 4-4) + +-- Test absorption 2: A+ B pattern - absorption with fixed suffix +WITH test_absorb_suffix AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_suffix +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) +-- All potential matches end at same row (row 4 with B) +-- 3 matches: (1-4, 2-4, 3-4) + +-- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) +WITH test_absorb_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (B+ C | B+ D) + DEFINE + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- Both branches B+ C and B+ D are absorbable (B+ unbounded first) +-- B+ D branch matches: 3 matches (1-4, 2-4, 3-4) + +-- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) +WITH test_no_absorb AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_no_absorb +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Pattern A B+ is NOT absorbable (A bounded first, B+ unbounded but not first) +-- Only Row 1 can start match (only row with A), so only one match: 1-4 + +-- Test absorption 5: GROUP merge enables absorption +WITH test_absorb_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B) (A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Pattern optimized: (A B) (A B)+ -> (A B){2,} +-- 2 matches: 1-6 (3 reps), 3-6 (2 reps) + +-- Test absorption 6: Multiple unbounded - first element unbounded enables absorption +WITH test_multi_unbounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_unbounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- 2 matches: 1-4, 2-4 (same endpoint 4) + +-- ============================================ +-- Jacob's RPR Patterns (from jacob branch) +-- ============================================ + +-- Test: A? (optional, greedy) +WITH jacob_optional AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_optional +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A?) + DEFINE A AS 'A' = ANY(flags) +); +-- Expected: 1-1 (matches A) + +-- Test: A{2} (exact count) +WITH jacob_exact AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_exact +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2}) + DEFINE A AS 'A' = ANY(flags) +); +-- Expected: 1-2 + +-- Test: A{1,3} (bounded range, greedy) +WITH jacob_bounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_bounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}) + DEFINE A AS 'A' = ANY(flags) +); +-- Expected: 1-3 (greedy takes max), then 4-4 + +-- Test: A | B (simple alternation) +WITH jacob_simple_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_simple_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Expected: 1-1 (A), 2-2 (B) + +-- Test: A | B | C (three-way alternation) +WITH jacob_three_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_three_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B | C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); +-- Expected: 1-1 (B) + +-- Test: A B C (concatenation) +WITH jacob_concat AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_concat +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); +-- Expected: 1-3 + +-- Test: A B? C (optional middle) +WITH jacob_optional_mid AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), + (3, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_optional_mid +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); +-- Expected: 1-2 (A C, B skipped) + +-- Test: (A B){2} (nested group with quantifier) +WITH jacob_nested_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_nested_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Expected: 1-4 (A B A B) + +-- Test: (A){3} (quantifier on grouped single element) +WITH jacob_group_quant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_group_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A){3}) + DEFINE A AS 'A' = ANY(flags) +); +-- Expected: 1-3 + +-- Test: A B C | A B C D E (lexical order - first alt wins) +WITH jacob_lex_first AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_lex_first +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C | A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- Expected: 1-3 (A B C wins by lexical order) + +-- Test: A B C D E | A B C (lexical order - longer first wins) +WITH jacob_lex_long AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_lex_long +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- Expected: 1-5 (A B C D E wins by lexical order) + +-- ============================================ +-- Alternation with quantifiers (BUG cases from Jacob's tests) +-- ============================================ + +-- Test: (A | B)+ C - alternation inside quantified group followed by C +WITH jacob_alt_quant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_alt_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); +-- Expected: 1-4 (A B A C) + +-- Test: ((A | B) C)+ - alternation inside group with outer quantifier +WITH jacob_alt_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), + (3, ARRAY['B']), + (4, ARRAY['C']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_alt_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B) C)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); +-- Expected: 1-4 (A C B C) + +-- ============================================ +-- RELUCTANT quantifiers (not yet supported) +-- ============================================ + +-- Test: A+? B (reluctant) - parser rejects with ERROR +WITH jacob_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Expected: ERROR (reluctant quantifiers not yet supported) + +-- Test: A{1,3}? B (reluctant bounded) - parser rejects with ERROR +WITH jacob_reluctant_bounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM jacob_reluctant_bounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Expected: ERROR (reluctant quantifiers not yet supported) + +-- ============================================ +-- Nested quantifiers (pathological patterns) +-- ============================================ +-- These patterns previously caused segfault or infinite loop. +-- Now they are either optimized at compile time or handled safely at runtime. + +-- Test: (A*)* - nested unbounded quantifiers (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)*) + DEFINE A AS TRUE +); + +-- Test: (A*)+ - inner nullable, outer requires one (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)+) + DEFINE A AS TRUE +); + +-- Test: (A+)* - outer nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)*) + DEFINE A AS TRUE +); + +-- Test: (A+)+ - both require match (optimized to A+) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)+) + DEFINE A AS TRUE +); + +-- Test: (A* B*)* - complex nested pattern (runtime protection) +-- Not optimized but handled safely by empty-match loop prevention +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A* B*)*) + DEFINE A AS TRUE, B AS TRUE +); + +-- Test: (((A)*)*)* - triple nested (optimized through recursive optimization) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 3) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((((A)*)*)*) + DEFINE A AS TRUE +); diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql new file mode 100644 index 00000000000..879f5fe48a2 --- /dev/null +++ b/src/test/regress/sql/rpr_base.sql @@ -0,0 +1,3658 @@ +-- ============================================================ +-- RPR Base Tests +-- Tests for Row Pattern Recognition (ISO/IEC 19075-5:2016) +-- ============================================================ +-- +-- Parser Layer: +-- Keyword Usage Tests +-- DEFINE Clause Tests +-- FRAME Options Tests +-- PARTITION BY + FRAME Tests +-- PATTERN Syntax Tests +-- Quantifiers Tests +-- Navigation Functions Tests +-- SKIP TO / INITIAL Tests +-- Serialization/Deserialization Tests +-- Error Cases Tests +-- +-- Planner Layer: +-- Pattern Optimization Tests +-- Absorption Flag Display Tests +-- Absorption Analysis Tests +-- Edge Case Tests +-- Optimization Fallback Tests +-- Planner Integration Tests +-- Subquery and CTE Tests +-- JOIN Tests +-- Complex Expression Tests +-- Set Operations Tests +-- Sorting and Grouping Tests +-- Stress Tests +-- Error Limit Tests +-- +-- Contributed Tests: +-- Jacob's Patterns +-- Pathological Patterns +-- ============================================================ + +SET client_min_messages = WARNING; + +-- ============================================================ +-- Keyword Usage Tests +-- ============================================================ + +-- RPR keywords as column names +-- Keywords: define, initial, past, pattern, seek + +CREATE TABLE rpr_keywords ( + id INT, + define INT, -- DEFINE keyword + initial INT, -- INITIAL keyword + past INT, -- PAST keyword + pattern INT, -- PATTERN keyword + seek INT, -- SEEK keyword + skip INT -- SKIP keyword (pre-existing) +); + +INSERT INTO rpr_keywords VALUES (1, 10, 20, 30, 40, 50, 60); + +SELECT id, define, initial, past, pattern, seek, skip +FROM rpr_keywords +ORDER BY id; + +DROP TABLE rpr_keywords; + +-- ============================================================ +-- DEFINE Clause Tests +-- ============================================================ + + +-- Simple column references +CREATE TABLE stock_price ( + dt DATE, + symbol TEXT, + price NUMERIC, + volume INT +); + +INSERT INTO stock_price VALUES + ('2024-01-01', 'AAPL', 150, 1000), + ('2024-01-02', 'AAPL', 155, 1200), + ('2024-01-03', 'AAPL', 152, 900), + ('2024-01-04', 'AAPL', 160, 1500), + ('2024-01-05', 'AAPL', 158, 1100); + +-- Simple column reference +SELECT dt, price, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (UP+) + DEFINE UP AS price > 150 +) +ORDER BY dt; + +-- Multiple column references +SELECT dt, price, volume, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (GOOD+) + DEFINE GOOD AS price > 150 AND volume > 1000 +) +ORDER BY dt; + +-- Expression in DEFINE +SELECT dt, price, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (HIGH+) + DEFINE HIGH AS price * 1.1 > 165 +) +ORDER BY dt; + +-- Arithmetic and functions +SELECT dt, price, volume, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (CALC+) + DEFINE CALC AS (price + volume / 100) > 160 +) +ORDER BY dt; + +DROP TABLE stock_price; + +-- Auto-generated DEFINE +CREATE TABLE rpr_auto (id INT, val INT); +INSERT INTO rpr_auto VALUES (1, 10), (2, 20), (3, 30), (4, 15); + +-- One variable undefined (B auto-generated as "B IS TRUE") +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B*) + DEFINE A AS val > 15 +) +ORDER BY id; + +-- Multiple undefined variables +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE A AS val > 0 + -- B and C auto-generated as "B IS TRUE", "C IS TRUE" +) +ORDER BY id; + +-- All variables defined explicitly +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (X Y Z) + DEFINE + X AS val > 10, + Y AS val > 20, + Z AS val < 20 +) +ORDER BY id; + +DROP TABLE rpr_auto; + +-- Duplicate variable names +CREATE TABLE rpr_dup (id INT); +INSERT INTO rpr_dup VALUES (1), (2); + +-- Duplicate DEFINE entries +SELECT COUNT(*) OVER w +FROM rpr_dup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id > 0, A AS id < 10 +); +-- Expected: ERROR: row pattern definition variable name "a" appears more than once in DEFINE clause + +DROP TABLE rpr_dup; + +-- Boolean coercion +CREATE TABLE rpr_bool (id INT, flag BOOLEAN); +INSERT INTO rpr_bool VALUES (1, true), (2, false); + +-- Non-boolean expression +SELECT COUNT(*) OVER w +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id +); +-- Expected: ERROR: argument of DEFINE must be type boolean + +-- Boolean column reference +SELECT id, flag, COUNT(*) OVER w as cnt +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (T+) + DEFINE T AS flag +) +ORDER BY id; + +-- NULL::boolean +SELECT id, COUNT(*) OVER w as cnt +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (N+) + DEFINE N AS NULL::boolean +) +ORDER BY id; + +DROP TABLE rpr_bool; + +-- Complex expressions +CREATE TABLE rpr_complex (id INT, val1 INT, val2 INT); +INSERT INTO rpr_complex VALUES (1, 10, 20), (2, 15, 25), (3, 20, 30); + +-- CASE expression +SELECT id, val1, val2, COUNT(*) OVER w as cnt +FROM rpr_complex +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C+) + DEFINE C AS CASE WHEN val1 > 10 THEN val2 > 20 ELSE false END +) +ORDER BY id; + +DROP TABLE rpr_complex; + +-- Pattern variable not in PATTERN (should be ignored) +CREATE TABLE rpr_unused (id INT); +INSERT INTO rpr_unused VALUES (1), (2); + +-- Extra DEFINE variable +SELECT id, COUNT(*) OVER w as cnt +FROM rpr_unused +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id > 0, B AS id > 5 -- B not in pattern +) +ORDER BY id; + +DROP TABLE rpr_unused; + +-- ============================================================ +-- FRAME Options Tests +-- ============================================================ + + +CREATE TABLE rpr_frame (id INT, val INT); +INSERT INTO rpr_frame VALUES + (1, 10), (2, 10), (3, 10), -- Same val: 10 + (4, 20), (5, 20), -- Same val: 20 + (6, 30); + +-- Valid frame options + +-- ROWS: counts physical rows (1 FOLLOWING = next 1 physical row) +-- Expected result: Each row can see 1 physical row ahead +-- id=1,2,3 (val=10): can see next row -> cnt=2 +-- id=4,5 (val=20): can see next row -> cnt=2 +-- id=6 (val=30): no next row -> cnt=1 +-- Result: [2,2,2,2,2,1] +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; + +-- Invalid frame start positions + +-- Not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used + +-- EXCLUDE options + +-- EXCLUDE not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE CURRENT ROW + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: EXCLUDE options are not permitted when row pattern recognition is used + +-- EXCLUDE GROUP not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE GROUP + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: EXCLUDE options are not permitted when row pattern recognition is used + +-- EXCLUDE TIES not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE TIES + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: EXCLUDE options are not permitted when row pattern recognition is used + +-- RANGE frame not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used + +-- GROUPS frame not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used + +-- Starting with N PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used + +-- Starting with N FOLLOWING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used + +-- Frame end bound edge cases + +-- End before start: CURRENT ROW AND 1 PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 PRECEDING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: frame starting from current row cannot have preceding rows + +-- End before start: CURRENT ROW AND UNBOUNDED PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED PRECEDING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: frame end cannot be UNBOUNDED PRECEDING + +-- Single row frame: CURRENT ROW AND CURRENT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND CURRENT ROW + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Zero offset: CURRENT ROW AND 0 FOLLOWING (equivalent to CURRENT ROW) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 0 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Large offset: CURRENT ROW AND 1000 FOLLOWING +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1000 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Maximum offset: CURRENT ROW AND 2147483646 FOLLOWING (INT_MAX - 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2147483646 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- RANGE frame with RPR (not permitted) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used + +-- GROUPS frame with RPR (not permitted) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + GROUPS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used + +DROP TABLE rpr_frame; + +-- ============================================================ +-- PARTITION BY + FRAME Tests +-- ============================================================ + +-- Test PARTITION BY with RPR to ensure proper partitioning behavior +CREATE TABLE rpr_partition (id INT, grp INT, val INT); +INSERT INTO rpr_partition VALUES + (1, 1, 10), (2, 1, 20), (3, 1, 30), + (4, 2, 15), (5, 2, 25), (6, 2, 35); + +-- PARTITION BY with ROWS frame +SELECT id, grp, val, COUNT(*) OVER w as cnt +FROM rpr_partition +WINDOW w AS ( + PARTITION BY grp + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE A AS val >= 10, B AS val > 15 +) +ORDER BY id; +-- Expected: Pattern matching should reset for each partition + +-- PARTITION BY with RANGE frame +SELECT id, grp, val, COUNT(*) OVER w as cnt +FROM rpr_partition +WINDOW w AS ( + PARTITION BY grp + ORDER BY val + RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 10, B AS val >= 20 +) +ORDER BY id; +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used + +DROP TABLE rpr_partition; + +-- ============================================================ +-- PATTERN Syntax Tests +-- ============================================================ + + +CREATE TABLE rpr_pattern (id INT, val INT); +INSERT INTO rpr_pattern VALUES + (1, 5), (2, 10), (3, 15), (4, 20), (5, 25), + (6, 30), (7, 35), (8, 40), (9, 45), (10, 50); + +-- Alternation (|) + +-- Multiple alternatives +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ | B+ | C+) + DEFINE A AS val > 35, B AS val BETWEEN 15 AND 35, C AS val < 15 +) +ORDER BY id; + +-- Grouping + +-- Nested grouping with quantifier +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B) C)+) + DEFINE A AS val > 10, B AS val > 20, C AS val > 30 +) +ORDER BY id; + +-- Sequence + +-- Multi-element sequence +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D E) + DEFINE + A AS val < 15, + B AS val BETWEEN 15 AND 25, + C AS val BETWEEN 25 AND 35, + D AS val BETWEEN 35 AND 45, + E AS val >= 45 +) +ORDER BY id; + +-- Complex combinations + +-- Alternation with grouping +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B) | (C D)) + DEFINE A AS val < 20, B AS val >= 20, C AS val < 30, D AS val >= 30 +) +ORDER BY id; + +-- Alternation + sequence + grouping +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START (UP{2,} DOWN? | FLAT+) FINISH) + DEFINE + START AS val >= 0, + UP AS val > 20, + DOWN AS val <= 30, + FLAT AS val BETWEEN 25 AND 35, + FINISH AS val > 40 +) +ORDER BY id; + +-- Nested alternation in groups +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) (C | D)) + DEFINE A AS val < 15, B AS val BETWEEN 15 AND 25, C AS val BETWEEN 25 AND 35, D AS val > 35 +) +ORDER BY id; + +DROP TABLE rpr_pattern; + +-- ============================================================ +-- Quantifiers Tests +-- ============================================================ + + +CREATE TABLE rpr_quant (id INT, val INT); +INSERT INTO rpr_quant VALUES + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50), + (6, 60), (7, 70), (8, 80), (9, 90), (10, 100); + +-- Basic greedy quantifiers + +-- * (zero or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- + (one or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 50 +) +ORDER BY id; + +-- ? (zero or one) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A?) + DEFINE A AS val = 50 +) +ORDER BY id; + +-- Edge case quantifiers + +-- {0} is not allowed (min must be >= 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0} B) + DEFINE A AS val > 1000, B AS val > 0 +) +ORDER BY id; +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 + +-- {0,0} is not allowed (max must be >= 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0,0} B) + DEFINE A AS val > 1000, B AS val > 0 +) +ORDER BY id; +-- Expected: ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 + +-- {0,1} (equivalent to ?) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0,1}) + DEFINE A AS val = 50 +) +ORDER BY id; + +-- Exact quantifiers {n} + +-- {3} (representative exact quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3}) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Range quantifiers {n,} + +-- {2,} (representative n or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}) + DEFINE A AS val > 40 +) +ORDER BY id; + +-- Upper bound quantifiers {,m} + +-- {,3} (representative up to m) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Range quantifiers {n,m} + +-- {3,7} (representative range) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3,7}) + DEFINE A AS val > 0 +) +ORDER BY id; + +DROP TABLE rpr_quant; + +-- Reluctant quantifiers (not yet supported) +CREATE TABLE rpr_reluctant (id INT, val INT); +INSERT INTO rpr_reluctant VALUES (1, 10), (2, 20), (3, 30); + +-- *? (zero or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- +? (one or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- ?? (zero or one, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A??) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- {n,}? (n or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- {n,m}? (n to m, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,3}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- {n}? (exactly n, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- {,m}? (up to m, reluctant) - COMPLETELY UNTESTED RULE! +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- Invalid reluctant patterns (wrong token after quantifier) + +-- {2}+ (should be {2}? not {2}+) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2}+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "+" + +-- {2,}* (should be {2,}? not {2,}*) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}*) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "*" + +-- {,3}* (should be {,3}? not {,3}*) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}*) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "*" + +-- {1,3}+ (should be {1,3}? not {1,3}+) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,3}+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "+" + +-- Boundary errors in reluctant quantifiers + +-- {-1}? (negative bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "-" + +-- {2147483647}? (INT_MAX) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 + +-- {-1,}? (negative lower bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "-" + +-- {2147483647,}? (INT_MAX lower bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647,}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: quantifier bound must be between 0 and 2147483646 + +-- {,0}? (zero upper bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,0}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 + +-- {,2147483647}? (INT_MAX upper bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,2147483647}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 + +-- {-1,3}? (negative lower in range) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,3}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "-" + +-- {1,2147483647}? (INT_MAX upper in range) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,2147483647}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 + +-- {5,3}? (min > max) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{5,3}?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: quantifier minimum bound must not exceed maximum + +-- Token-separated reluctant quantifiers (space between quantifier and ?) +-- These may be tokenized differently by the lexer + +-- * ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* ?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- + ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ ?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- {2,} ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,} ?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +-- Invalid token combinations + +-- * + (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* +) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "+" + +-- + * (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ *) + DEFINE A AS val > 0 +); +-- Expected: ERROR: syntax error at or near "*" + +-- ? ? (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A? ?) + DEFINE A AS val > 0 +); +-- Expected: ERROR: reluctant quantifiers are not yet supported + +DROP TABLE rpr_reluctant; + +-- Quantifier boundary conditions + +CREATE TABLE rpr_bounds (id INT); +INSERT INTO rpr_bounds VALUES (1), (2); + +-- min > max +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{5,3}) + DEFINE A AS id > 0 +); +-- Expected: ERROR: quantifier minimum bound must not exceed maximum + +-- Large bounds +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1000,2000}) + DEFINE A AS id > 0 +); + +-- Very large bound +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{100000}) + DEFINE A AS id > 0 +); + +-- INT_MAX - 1 = 2147483646 (at limit) +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483646}) + DEFINE A AS id > 0 +); + +-- INT_MAX = 2147483647 (over limit) +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647}) + DEFINE A AS id > 0 +); +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 + +-- {n,} boundary errors + +-- Negative lower bound in {n,} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,}) + DEFINE A AS id > 0 +); +-- Expected: ERROR: syntax error at or near "-" + +-- INT_MAX in {n,} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647,}) + DEFINE A AS id > 0 +); +-- Expected: ERROR: quantifier bound must be between 0 and 2147483646 + +-- {,m} boundary errors + +-- Zero upper bound in {,m} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,0}) + DEFINE A AS id > 0 +); +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 + +-- INT_MAX in {,m} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,2147483647}) + DEFINE A AS id > 0 +); +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 + +DROP TABLE rpr_bounds; + +-- ============================================================ +-- Navigation Functions Tests (PREV / NEXT) +-- ============================================================ + + +CREATE TABLE rpr_nav (id INT, val INT); +INSERT INTO rpr_nav VALUES + (1, 10), (2, 20), (3, 15), (4, 25), (5, 30); + +-- PREV function - reference previous row in pattern +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS val > PREV(val) +) +ORDER BY id; + +-- NEXT function - reference next row in pattern +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE + A AS val < NEXT(val), + B AS val > 0 +) +ORDER BY id; + +-- Combined PREV and NEXT +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE + A AS val > 0, + B AS val > PREV(val) AND val < NEXT(val), + C AS val > PREV(val) +) +ORDER BY id; + +DROP TABLE rpr_nav; + +-- ============================================================ +-- SKIP TO / INITIAL Tests +-- ============================================================ + + +CREATE TABLE rpr_skip (id INT, val INT); +INSERT INTO rpr_skip VALUES + (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), + (6, 6), (7, 7), (8, 8); + +-- SKIP TO NEXT ROW + +-- SKIP TO NEXT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 +) +ORDER BY id; + +-- SKIP PAST LAST ROW + +-- SKIP PAST LAST ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 +) +ORDER BY id; + +-- Default behavior (should be SKIP PAST LAST ROW) + +-- No SKIP TO clause (default) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B) + DEFINE A AS val > 0, B AS val > 1 +) +ORDER BY id; + +-- Compare default with explicit PAST LAST ROW +-- Results should be identical +WITH default_skip AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_skip + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 + ) +), +explicit_skip AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_skip + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 + ) +) +SELECT 'default' as type, * FROM default_skip +UNION ALL +SELECT 'explicit' as type, * FROM explicit_skip +ORDER BY type, id; + +DROP TABLE rpr_skip; + +-- INITIAL clause + +CREATE TABLE rpr_init (id INT, val INT); +INSERT INTO rpr_init VALUES (1, 10), (2, 20), (3, 30), (4, 40); + +-- Explicit INITIAL +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_init +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Implicit INITIAL (default) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_init +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +DROP TABLE rpr_init; + +-- SEEK + +CREATE TABLE rpr_seek (id INT, val INT); +INSERT INTO rpr_seek VALUES (1, 10); + +-- SEEK keyword +SELECT COUNT(*) OVER w +FROM rpr_seek +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + SEEK + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Expected: ERROR: SEEK is not supported +-- HINT: Use INITIAL instead. + +DROP TABLE rpr_seek; + +-- ============================================================ +-- Serialization/Deserialization Tests +-- ============================================================ + + +-- View creation and deparsing + +CREATE TABLE rpr_serial (id INT, val INT); +INSERT INTO rpr_serial VALUES + (1, 10), (2, 20), (3, 15), (4, 25), (5, 30); + +-- Simple pattern +CREATE VIEW rpr_serial_v1 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); + +-- Verify view works (tests deserialization) +SELECT * FROM rpr_serial_v1 ORDER BY id; + +-- Verify deparsing +SELECT pg_get_viewdef('rpr_serial_v1'::regclass); + +DROP VIEW rpr_serial_v1; + +-- Complex pattern with alternation +CREATE VIEW rpr_serial_v2 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ | B*) + DEFINE A AS val > 20, B AS val <= 20 +); + +SELECT * FROM rpr_serial_v2 ORDER BY id; +SELECT pg_get_viewdef('rpr_serial_v2'::regclass); + +DROP VIEW rpr_serial_v2; + +-- Pattern with grouping and quantifiers +CREATE VIEW rpr_serial_v3 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2,5} | C*) + DEFINE + A AS val > 10, + B AS val > 20, + C AS val <= 10 +); + +SELECT * FROM rpr_serial_v3 ORDER BY id; +SELECT pg_get_viewdef('rpr_serial_v3'::regclass); + +DROP VIEW rpr_serial_v3; + +-- All features combined +CREATE VIEW rpr_serial_v4 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START (MID{1,3} | ALT+) FINISH) + DEFINE + START AS val > 5, + MID AS val BETWEEN 10 AND 25, + ALT AS val > 25, + FINISH AS val > 15 +); + +SELECT * FROM rpr_serial_v4 ORDER BY id; +SELECT pg_get_viewdef('rpr_serial_v4'::regclass); + +DROP VIEW rpr_serial_v4; + +-- Additional quantifiers for deparsing coverage + +-- ? quantifier (zero or one) +CREATE VIEW rpr_serial_v5 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B?) + DEFINE A AS val > 10, B AS val > 20 +); + +SELECT * FROM rpr_serial_v5 ORDER BY id; +SELECT pg_get_viewdef('rpr_serial_v5'::regclass); + +DROP VIEW rpr_serial_v5; + +-- {n,} quantifier (n or more) +CREATE VIEW rpr_serial_v6 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}) + DEFINE A AS val > 15 +); + +SELECT * FROM rpr_serial_v6 ORDER BY id; +SELECT pg_get_viewdef('rpr_serial_v6'::regclass); + +DROP VIEW rpr_serial_v6; + +-- {n} quantifier (exactly n) +CREATE VIEW rpr_serial_v7 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3}) + DEFINE A AS val > 0 +); + +SELECT * FROM rpr_serial_v7 ORDER BY id; +SELECT pg_get_viewdef('rpr_serial_v7'::regclass); + +DROP VIEW rpr_serial_v7; + +-- Nested ALT pattern (tests deparse of complex nested structure) +CREATE VIEW rpr_serial_v8 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 15, B AS val <= 25, C AS val <= 30, D AS val > 30 +); + +SELECT * FROM rpr_serial_v8 ORDER BY id; +SELECT pg_get_viewdef('rpr_serial_v8'::regclass); + +DROP VIEW rpr_serial_v8; + +DROP TABLE rpr_serial; + +-- Materialized view (if supported) + +CREATE TABLE rpr_mview (id INT, val INT); +INSERT INTO rpr_mview VALUES (1, 10), (2, 20), (3, 30); + +CREATE MATERIALIZED VIEW rpr_mview_v1 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_mview +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); + +SELECT * FROM rpr_mview_v1 ORDER BY id; +SELECT pg_get_viewdef('rpr_mview_v1'::regclass); + +-- Refresh test +REFRESH MATERIALIZED VIEW rpr_mview_v1; +SELECT * FROM rpr_mview_v1 ORDER BY id; + +DROP MATERIALIZED VIEW rpr_mview_v1; +DROP TABLE rpr_mview; + +-- Prepared statements (tests outfuncs.c / readfuncs.c) + +CREATE TABLE rpr_prep (id INT, val INT); +INSERT INTO rpr_prep VALUES (1, 10), (2, 20), (3, 30); + +-- Simple prepared statement +PREPARE rpr_prep_simple AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +EXECUTE rpr_prep_simple; +EXECUTE rpr_prep_simple; + +DEALLOCATE rpr_prep_simple; + +-- Prepared statement with parameters +PREPARE rpr_prep_param(int) AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WHERE id <= $1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 10 +) +ORDER BY id; + +EXECUTE rpr_prep_param(2); +EXECUTE rpr_prep_param(3); + +DEALLOCATE rpr_prep_param; + +-- Complex prepared statement +PREPARE rpr_prep_complex AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B){1,2} | C+) + DEFINE + A AS val > 5, + B AS val > 15, + C AS val <= 15 +) +ORDER BY id; + +EXECUTE rpr_prep_complex; +EXECUTE rpr_prep_complex; + +DEALLOCATE rpr_prep_complex; + +DROP TABLE rpr_prep; + +-- CTE and Subquery (tests copyfuncs.c) + +CREATE TABLE rpr_copy (id INT, val INT); +INSERT INTO rpr_copy VALUES (1, 10), (2, 20), (3, 30), (4, 40); + +-- Simple CTE +WITH rpr_cte AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM rpr_cte ORDER BY id; + +-- CTE with multiple references (forces node copy) +WITH rpr_cte AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 + ) +) +SELECT c1.id, c1.cnt as cnt1, c2.cnt as cnt2 +FROM rpr_cte c1 +JOIN rpr_cte c2 ON c1.id = c2.id +ORDER BY c1.id; + +-- Subquery in FROM clause +SELECT * +FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B?) + DEFINE A AS val > 10, B AS val > 20 + ) +) sub +WHERE cnt > 0 +ORDER BY id; + +-- Nested subqueries +SELECT * +FROM ( + SELECT * + FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val >= 10 + ) + ) inner_sub + WHERE cnt > 0 +) outer_sub +ORDER BY id; + +DROP TABLE rpr_copy; + +-- DISTINCT and set operations (tests equalfuncs.c) + +CREATE TABLE rpr_equal (id INT, val INT); +INSERT INTO rpr_equal VALUES (1, 10), (2, 20), (3, 10), (4, 20); + +-- DISTINCT with RPR +SELECT DISTINCT cnt +FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub +ORDER BY cnt; + +-- UNION with RPR in both sides +SELECT id, val, cnt FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE val = 10 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub1 +UNION +SELECT id, val, cnt FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE val = 20 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub2 +ORDER BY id; + +-- UNION ALL +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 10 + ) +) sub +UNION ALL +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val <= 10 + ) +) sub +ORDER BY id, cnt; + +-- INTERSECT +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE id <= 3 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub1 +INTERSECT +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE id >= 2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub2 +ORDER BY id; + +DROP TABLE rpr_equal; + +-- View with multiple window definitions + +CREATE TABLE rpr_multiwin (id INT, val INT); +INSERT INTO rpr_multiwin VALUES (1, 10), (2, 20), (3, 30); + +CREATE VIEW rpr_multiwin_v AS +SELECT + id, + val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2 +FROM rpr_multiwin +WINDOW + w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 + ), + w2 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B*) + DEFINE B AS val <= 15 + ); + +SELECT * FROM rpr_multiwin_v ORDER BY id; +SELECT pg_get_viewdef('rpr_multiwin_v'::regclass); + +DROP VIEW rpr_multiwin_v; +DROP TABLE rpr_multiwin; + +-- ============================================================ +-- Error Cases Tests +-- ============================================================ + + +DROP TABLE IF EXISTS rpr_err; +CREATE TABLE rpr_err (id INT, val INT); +INSERT INTO rpr_err VALUES (1, 10), (2, 20); + +-- Syntax errors + +-- Invalid quantifier syntax +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+!) + DEFINE A AS val > 0 +); +-- Expected: Syntax error + +-- Unmatched parentheses +SET client_min_messages = NOTICE; +DO $$ +BEGIN + EXECUTE 'SELECT COUNT(*) OVER w FROM rpr_err WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B) DEFINE A AS val > 0, B AS val > 10)'; + RAISE NOTICE 'Unmatched parentheses: UNEXPECTED SUCCESS'; +EXCEPTION + WHEN syntax_error THEN + RAISE NOTICE 'Unmatched parentheses: EXPECTED ERROR - %', SQLERRM; + WHEN OTHERS THEN + RAISE NOTICE 'Unmatched parentheses: UNEXPECTED ERROR - %', SQLERRM; +END $$; +SET client_min_messages = WARNING; + +-- Empty DEFINE +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE +); +-- Expected: Syntax error + +-- Empty PATTERN +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN () + DEFINE A AS val > 0 +); +-- Expected: Syntax error + +-- DEFINE without PATTERN (PATTERN and DEFINE must be used together) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + DEFINE A AS val > 0 +); +-- Expected: Syntax error + +-- Qualified column references (NOT SUPPORTED) +-- Pattern variables in DEFINE clause cannot use qualified references (A.price) +-- This gives a confusing error about missing FROM-clause entry + +-- Qualified reference in DEFINE clause +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS A.val > 0 +); +-- Expected: ERROR: missing FROM-clause entry for table "a" + +-- Semantic errors + +-- Undefined column in DEFINE +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS nonexistent_column > 0 +); +-- Expected: ERROR: column "nonexistent_column" does not exist + +-- Type mismatch +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 'string' +); +-- Expected: ERROR: invalid input syntax for type integer: "string" + +-- Aggregate function in DEFINE (if not allowed) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +); +-- Expected: ERROR or works depending on implementation + +-- Subquery in DEFINE (NOT SUPPORTED) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > (SELECT max(val) FROM rpr_err) +); +-- Expected: ERROR: cannot use subquery in DEFINE expression + +-- Edge cases + +-- Pattern variable not used (should work, extra vars ignored) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0, B AS val > 5, C AS val > 10 +) +ORDER BY id; + +DROP TABLE rpr_err; + +-- NULL handling + +CREATE TABLE rpr_null (id INT, val INT); +INSERT INTO rpr_null VALUES (1, 10), (2, NULL), (3, 30); + +-- NULL in DEFINE expression +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 +) +ORDER BY id; + +-- IS NULL in DEFINE +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (N+) + DEFINE N AS val IS NULL +) +ORDER BY id; + +-- IS NOT NULL in DEFINE +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (NN+) + DEFINE NN AS val IS NOT NULL +) +ORDER BY id; + +DROP TABLE rpr_null; + +-- ============================================================ +-- Pattern Optimization Tests +-- ============================================================ +-- Tests for pattern optimization in optimizer/plan/rpr.c +-- Use EXPLAIN to verify optimized pattern (shown as "Pattern: ...") + +CREATE TABLE rpr_plan (id INT, val INT); +INSERT INTO rpr_plan VALUES + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50), + (6, 60), (7, 70), (8, 80), (9, 90), (10, 100); + +-- Consecutive VAR merge: A A A -> a{3} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A A) DEFINE A AS val > 0); + +-- Consecutive VAR merge: A{2} A{3} -> a{5} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2} A{3}) DEFINE A AS val > 0); + +-- Consecutive VAR merge: A+ A* -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ A*) DEFINE A AS val > 0); + +-- Consecutive VAR merge: A A+ -> a{2,} +-- Tests line 251: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveVars +-- prev: A{1,1} (finite), child: A+ (infinite) triggers line 251 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A+) DEFINE A AS val > 0); + +-- Consecutive GROUP merge with finite quantifiers: ((A B){5}) ((A B){10}) -> merged +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B){5}) ((A B){10})) DEFINE A AS val <= 50, B AS val > 50); + +-- Consecutive GROUP merge with unbounded: (A B)+ (A B)+ -> (a b){2,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ (A B)+) DEFINE A AS val <= 50, B AS val > 50); + +-- Consecutive GROUP merge: (A B){2} (A B)+ -> (a b){3,} +-- Tests line 325: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveGroups +-- prev: (A B){2,2} (finite), child: (A B)+ (infinite) triggers line 325 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (A B)+) DEFINE A AS val <= 50, B AS val > 50); + +-- PREFIX merge: A B (A B)+ -> (a b){2,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+) DEFINE A AS val <= 50, B AS val > 50); + +-- PREFIX and SUFFIX merge: A B (A B)+ A B -> (a b){3,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+ A B) DEFINE A AS val <= 40, B AS val > 40); + +-- Flatten nested: A ((B) (C)) -> a b c +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + +-- ALT flatten: (A | (B | C))+ -> (a | b | c)+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | (B | C))+) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + +-- ALT deduplicate: (A | B | A) -> (a | b) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B | A)+) DEFINE A AS val <= 50, B AS val > 50); + +-- Quantifier multiply: (A{2}){3} -> a{6} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3}) DEFINE A AS val > 0); + +-- Quantifier multiply with child range: (A{2,3}){3} -> a{6,9} +-- outer exact, child range - optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2,3}){3}) DEFINE A AS val > 0); + +-- Quantifier NO multiply: (A{2}){2,3} stays as (a{2}){2,3} +-- outer range - gaps would occur (4,6 not 4,5,6), no optimization +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){2,3}) DEFINE A AS val > 0); + +-- Quantifier NO multiply: (A{2}){2,} stays as (a{2}){2,} +-- outer unbounded - gaps would occur (4,6,8,... not 4,5,6,...), no optimization +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){2,}) DEFINE A AS val > 0); + +-- Quantifier multiply: (A){2,} -> a{2,} +-- child exact 1 - no gaps, optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A){2,}) DEFINE A AS val > 0); + +-- Quantifier multiply: (A)+ -> a+ +-- child exact 1 - no gaps, optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)+) DEFINE A AS val > 0); + +-- Quantifier NO multiply: (A{2}){3,5} stays as (a{2}){3,5} +-- outer range, child exact > 1 - gaps would occur (6,8,10 not 6,7,8,9,10) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3,5}) DEFINE A AS val > 0); + +-- Quantifier NO multiply: (A{2,3}){2,3} stays as (a{2,3}){2,3} +-- outer range, child range - gaps possible (e.g., (A{4,5}){2,3} misses 11) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2,3}){2,3}) DEFINE A AS val > 0); + +-- Nested unbounded: (A*)* -> a* +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A*)*) DEFINE A AS val > 0); + +-- Nested unbounded: (A+)* -> a* +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+)*) DEFINE A AS val > 0); + +-- Nested unbounded: (A+)+ -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+)+) DEFINE A AS val > 0); + +-- Unwrap GROUP{1,1}: (A) -> a +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)) DEFINE A AS val > 0); + +-- Unwrap GROUP{1,1}: (A B) -> a b +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)) DEFINE A AS val <= 50, B AS val > 50); + +-- Combined optimization: A A (B B)+ B B C C C -> a{2} (b{2}){2,} c{3} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A (B B)+ B B C C C) + DEFINE A AS val <= 20, B AS val > 20 AND val <= 70, C AS val > 70); + +-- Consecutive GROUP merge with unbounded: (A+) (A+) -> a{2,} +-- Tests mergeConsecutiveGroups with child->max == INF +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+) (A+)) DEFINE A AS val > 0); + +-- Consecutive GROUP merge finite: (A{10}){20} -> a{200} +-- Tests mergeConsecutiveGroups with both finite +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{10}){20}) DEFINE A AS val > 0); + +-- Different GROUP prevents merge: (A B){2} (C D){3} +-- Tests mergeConsecutiveGroups flush previous +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (C D){3}) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + +-- Different children count prevents merge: (A B)+ (A B C)+ +-- Tests rprPatternChildrenEqual length check +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ (A B C)+) + DEFINE A AS val <= 33, B AS val > 33 AND val <= 66, C AS val > 66); + +-- PREFIX only merge: A B (A B)+ -> (a b){2,} +-- Tests mergeGroupPrefixSuffix: absorb preceding elements into GROUP min +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+) DEFINE A AS val <= 50, B AS val > 50); + +-- SUFFIX only merge: (A B)+ A B -> (a b){2,} +-- Tests mergeGroupPrefixSuffix: absorb following elements into GROUP min +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ A B) DEFINE A AS val <= 50, B AS val > 50); + +-- Multiple SUFFIX absorption with skipUntil: (A B)+ A B A B C +-- Tests mergeGroupPrefixSuffix: skip absorbed suffix elements +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ A B A B C) + DEFINE A AS val <= 50, B AS val > 50 AND val <= 75, C AS val > 75); + +-- PREFIX merge with remaining prefix: A B C D (C D)+ +-- Tests mergeGroupPrefixSuffix: trimmed list reconstruction +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D (C D)+) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + +-- PREFIX merge with quantifiers: A B* (A B*)+ -> (a b*){2,} +-- Tests mergeGroupPrefixSuffix: quantifier comparison in rprPatternEqual +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B* (A B*)+) + DEFINE A AS val <= 50, B AS val > 50); + +-- PREFIX merge with multiple quantifiers: A+ B* C? (A+ B* C?)+ -> (a+ b* c?){2,} +-- Tests mergeGroupPrefixSuffix: complex quantifier patterns +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B* C? (A+ B* C?)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60); + +-- SUFFIX merge with quantifiers: (A B*)+ A B* -> (a b*){2,} +-- Tests mergeGroupPrefixSuffix: suffix with quantifiers +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B*)+ A B*) + DEFINE A AS val <= 50, B AS val > 50); + +-- Unwrap GROUP{1,1}: ((A | B | C)) -> (a | b | c) +-- Tests tryUnwrapGroup removing redundant outer GROUP +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + +-- ============================================================ +-- Absorption Flag Display Tests +-- ============================================================ +-- Tests absorption marker display in EXPLAIN output +-- Markers: ' = branch element, " = judgment point +-- Files: explain.c (append_rpr_quantifier, deparse_rpr_pattern) + +-- Simple VAR: A+ -> a+" (judgment point) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+) DEFINE A AS val > 0); + +-- GROUP unbounded: (A B)+ -> (a' b')+" (branch + judgment) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B)+) DEFINE A AS val <= 50, B AS val > 50); + +-- ALT both absorbable: A+ | B+ -> (a+" | b+") +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ | B+) DEFINE A AS val <= 50, B AS val > 50); + +-- ALT one absorbable: A+ | B -> (a+" | b) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ | B) DEFINE A AS val <= 50, B AS val > 50); + +-- Sequence with absorbable start: A+ B -> a+" b +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ B) DEFINE A AS val <= 50, B AS val > 50); + +-- Complex nested: ((A+ B) | C) D | A B C - deeply nested ALT +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 30, B AS val <= 60, C AS val <= 80, D AS val > 80); + +-- Nested unbounded: (A+ | B)+ -> (a+" | b)+ (first iteration absorbable) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ | B)+) + DEFINE A AS val <= 50, B AS val > 50); + +-- ALT inside unbounded GROUP: (A+ B | A B)* -> (a+" b | a b)* (first iteration absorbable) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ B | A B)*) + DEFINE A AS val <= 50, B AS val > 50); + +-- Non-absorbable (unbounded not at start): A B+ -> a b+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A B+) DEFINE A AS val <= 50, B AS val > 50); + +-- Non-absorbable (no unbounded branch): (A | B){2,} -> (a | b){2,} (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A | B){2,}) DEFINE A AS val <= 50, B AS val > 50); + +-- Non-absorbable (SKIP TO NEXT ROW): A+ -> a+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW PATTERN (A+) DEFINE A AS val > 0); + +-- Non-absorbable (limited frame): A+ -> a+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+) DEFINE A AS val > 0); + +-- ============================================================ +-- Absorption Analysis Tests +-- ============================================================ +-- Tests context absorption optimization (O(n^2) -> O(n)) +-- Files: rpr.c (computeAbsorbability) + +-- Simple Absorbable Pattern: A+ B +-- Pattern starts with unbounded VAR + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + +-- Absorbable GROUP Pattern: (A B)+ C +-- Pattern starts with unbounded GROUP + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + +-- Non-Absorbable: Unbounded Not at Start +-- Pattern: A B+ (unbounded not at start) + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + +-- ALT with Absorbable Branches +-- Pattern: (A+ | B+) C - both branches absorbable + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | B+) C) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + +-- ALT with Mixed Branches +-- Pattern: (A+ | B C) - only first branch absorbable + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | B C)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + +-- Non-Absorbable: ALT Inside GROUP +-- Pattern: (A | B){2,} - ALT inside unbounded GROUP + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B){2,}) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + +-- Non-Absorbable: Nested Unbounded +-- Pattern: ((A B)+ C)+ - nested GROUP structure + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B)+ C)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + +-- Non-Absorbable: Unbounded Element Inside GROUP +-- Pattern: (A B+){2,} - unbounded inside GROUP + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B+){2,}) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + +-- Runtime Conditions: SKIP TO NEXT ROW +-- Absorption disabled with SKIP TO NEXT ROW + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + +-- Runtime Conditions: Limited Frame +-- Absorption disabled with limited frame end + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + +-- ============================================================ +-- Edge Case Tests +-- ============================================================ +-- Tests boundary conditions and complex scenarios + +-- Empty Match Prevention +-- Pattern that could match empty: A* + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*) + DEFINE A AS val > 1000 -- Never matches +) +ORDER BY id; + +-- All Rows Match +-- Pattern where every row matches + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val >= 0 -- Always true +) +ORDER BY id; + +-- Large Quantifiers +-- Pattern: A{100} (large exact quantifier) + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{100}) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Pattern: A{10,20} (large range quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{10,20}) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Complex Multi-Level Nesting +-- Pattern: (((A B) | C)+ D)+ + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A B) | C)+ D)+) + DEFINE A AS val <= 20, B AS val > 20 AND val <= 40, + C AS val > 40 AND val <= 60, D AS val > 60 +) +ORDER BY id; + +-- Long Alternation Chain +-- Pattern: A | B | C | D | E (5-way ALT) + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS val = 10, B AS val = 30, C AS val = 50, + D AS val = 70, E AS val = 90 +) +ORDER BY id; + +-- Long Sequence +-- Pattern: A B C D E F G H (8-element SEQ) + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D E F G H) + DEFINE A AS val >= 10, B AS val >= 20, C AS val >= 30, + D AS val >= 40, E AS val >= 50, F AS val >= 60, + G AS val >= 70, H AS val >= 80 +) +ORDER BY id; + +-- Interleaved Quantifiers +-- Pattern: A{2} B+ C{3,5} D* E{1,} + +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2} B+ C{3,5} D* E{1,}) + DEFINE A AS val > 0, B AS val > 0, C AS val > 0, + D AS val > 0, E AS val > 0 +) +ORDER BY id; + +-- ============================================================ +-- Optimization Fallback Tests +-- ============================================================ +-- Tests for optimization edge cases and fallback behavior + +CREATE TABLE rpr_fallback (id INT, val INT); +INSERT INTO rpr_fallback VALUES (1, 10), (2, 20); + +-- Test: min quantifier overflow causes optimization fallback (min == max case) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000}){2}) + DEFINE A AS val > 0 +); +-- Expected: Fallback - pattern not merged due to min overflow (4000000000 > INT32_MAX) + +-- Test: max-only quantifier overflow causes optimization fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{1,2000000000}){2}) + DEFINE A AS val > 0 +); +-- Expected: Fallback - min OK (2*1=2), but max overflow (2*2000000000 > INT32_MAX) + +-- Test: max quantifier exceeds valid range (2147483647 = INT_MAX, limit is 2147483646) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000,2147483647}){2}) + DEFINE A AS val > 0 +); +-- Expected: ERROR at parse time before optimization + +-- Test: nested unbounded with large min causes overflow fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000,}){2000000000,}) + DEFINE A AS val > 0 +); +-- Expected: Fallback - min overflow (2000000000 * 2000000000 > INT32_MAX) + +-- Test: prefix mismatch causes optimization fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C D)+) + DEFINE A AS val > 0, B AS val > 5, C AS val > 10, D AS val > 15 +); +-- Expected: Fallback - prefix elements don't match GROUP content + +DROP TABLE rpr_fallback; + +-- ============================================================ +-- Planner Integration Tests +-- ============================================================ +-- Tests full planning pipeline and WindowAgg plan node creation +-- Files: planner.c, createplan.c + +CREATE TABLE rpr_planner (id INT, category VARCHAR(10), val INT); +INSERT INTO rpr_planner VALUES + (1, 'A', 10), (2, 'A', 20), (3, 'A', 30), + (4, 'B', 40), (5, 'B', 50), (6, 'B', 60), + (7, 'C', 70), (8, 'C', 80), (9, 'C', 90); + +-- Multiple Window Functions in Same Query +SELECT id, category, val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2 +FROM rpr_planner +WINDOW w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +), +w2 AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val >= 40 +) +ORDER BY id; + +-- Window Function with PARTITION BY + +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category, id; + +-- Window Function with Complex ORDER BY + +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + ORDER BY category DESC, val ASC + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category DESC, val ASC; + +-- Named Window Reference + +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Inline Window Definition + +SELECT id, category, val, + COUNT(*) OVER ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) as cnt +FROM rpr_planner +ORDER BY id; + +-- Window with Aggregate Functions +SELECT category, + COUNT(*) OVER w as window_cnt, + COUNT(*) as agg_cnt +FROM rpr_planner +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +GROUP BY category +ORDER BY category; +-- Expected: ERROR (GROUP BY with window RPR not supported) + +-- ============================================================ +-- Subquery and CTE Tests +-- Files: planner.c, prepjointree.c +-- ============================================================ +-- Tests RPR with subqueries and CTEs + +-- RPR in Subquery (FROM clause) + +SELECT * FROM ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub +WHERE cnt > 5 +ORDER BY id; + +-- RPR with Subquery in WHERE + +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WHERE val > (SELECT AVG(val) FROM rpr_planner) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 50 +) +ORDER BY id; + +-- CTE with RPR + +WITH rpr_cte AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM rpr_cte WHERE cnt > 5 ORDER BY id; + +-- Multiple CTE References + +WITH rpr_cte AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT c1.id, c1.cnt, c2.cnt as cnt2 +FROM rpr_cte c1 +JOIN rpr_cte c2 ON c1.id = c2.id +ORDER BY c1.id; + +-- Nested CTEs + +WITH cte1 AS ( + SELECT id, category, val FROM rpr_planner WHERE val > 30 +), +cte2 AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM cte1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM cte2 ORDER BY id; + +-- ============================================================ +-- JOIN Tests +-- Files: prepjointree.c, setrefs.c +-- ============================================================ +-- Tests RPR with JOINs and multiple table references + +CREATE TABLE rpr_join1 (id INT, val1 INT); +CREATE TABLE rpr_join2 (id INT, val2 INT); + +INSERT INTO rpr_join1 VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50); +INSERT INTO rpr_join2 VALUES (1, 100), (2, 200), (3, 300), (4, 400), (5, 500); + +-- RPR After INNER JOIN + +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +INNER JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 + val2 > 100 +) +ORDER BY t1.id; + +-- RPR After LEFT JOIN + +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +LEFT JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 > 0 +) +ORDER BY t1.id; + +-- RPR with Multiple Tables in DEFINE + +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +INNER JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE A AS t1.val1 > 20, + B AS t2.val2 > 200 +) +ORDER BY t1.id; + +-- RPR After Cross Join + +SELECT t1.id as id1, t2.id as id2, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +CROSS JOIN rpr_join2 t2 +WHERE t1.id <= 2 AND t2.id <= 2 +WINDOW w AS ( + ORDER BY t1.id, t2.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 + val2 > 0 +) +ORDER BY t1.id, t2.id; + +-- Self-Join with RPR + +SELECT a.id, a.val1, b.val1 as val1_next, + COUNT(*) OVER w as cnt +FROM rpr_join1 a +INNER JOIN rpr_join1 b ON a.id + 1 = b.id +WINDOW w AS ( + ORDER BY a.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (X+) + DEFINE X AS a.val1 < b.val1 +) +ORDER BY a.id; + +DROP TABLE rpr_join1, rpr_join2; + +-- ============================================================ +-- Complex Expression Tests +-- Files: createplan.c, setrefs.c +-- ============================================================ +-- Tests complex target list expressions + +CREATE TABLE rpr_target (id INT, val INT); +INSERT INTO rpr_target VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50); + +-- Expressions in Target List + +SELECT id, + val * 2 as doubled, + val + 10 as added, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- CASE Expression in Target List + +SELECT id, val, + CASE + WHEN val < 30 THEN 'low' + WHEN val < 50 THEN 'medium' + ELSE 'high' + END as category, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Subquery in Target List + +SELECT id, val, + (SELECT MAX(val) FROM rpr_target) as max_val, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Function Calls in Target List + +SELECT id, val, + COALESCE(val, 0) as coalesced, + ABS(val - 30) as distance, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Column Aliases and References + +SELECT id as row_id, + val as value, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY row_id; + +DROP TABLE rpr_target; + +-- ============================================================ +-- Set Operations Tests +-- Files: planner.c +-- ============================================================ +-- Tests RPR with UNION, INTERSECT, EXCEPT + +CREATE TABLE rpr_set1 (id INT, val INT); +CREATE TABLE rpr_set2 (id INT, val INT); + +INSERT INTO rpr_set1 VALUES (1, 10), (2, 20), (3, 30); +INSERT INTO rpr_set2 VALUES (2, 20), (3, 30), (4, 40); + +-- UNION with RPR + +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +UNION +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + +-- UNION ALL with RPR + +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +UNION ALL +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id, val; + +-- INTERSECT with RPR + +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +INTERSECT +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + +-- EXCEPT with RPR + +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +EXCEPT +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + +DROP TABLE rpr_set1, rpr_set2; + +-- ============================================================ +-- Sorting and Grouping Tests +-- Files: planner.c, createplan.c +-- ============================================================ +-- Tests RPR interaction with sorting and grouping + +CREATE TABLE rpr_sort (id INT, category VARCHAR(10), val INT); +INSERT INTO rpr_sort VALUES + (1, 'A', 30), (2, 'B', 20), (3, 'A', 10), + (4, 'B', 40), (5, 'A', 50), (6, 'B', 60); + +-- RPR with GROUP BY + +SELECT category, + COUNT(*) as group_cnt, + MAX(val) as max_val, + COUNT(*) OVER w as window_cnt +FROM rpr_sort +GROUP BY category +WINDOW w AS ( + ORDER BY category + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +) +ORDER BY category; + +-- RPR with HAVING + +SELECT category, + COUNT(*) as group_cnt, + COUNT(*) OVER w as window_cnt +FROM rpr_sort +GROUP BY category +HAVING COUNT(*) > 2 +WINDOW w AS ( + ORDER BY category + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +) +ORDER BY category; + +-- RPR with DISTINCT + +SELECT DISTINCT category, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category; + +-- RPR with ORDER BY (different from window ORDER BY) + +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY val DESC; + +-- RPR with LIMIT and OFFSET + +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id +LIMIT 3 OFFSET 1; + +DROP TABLE rpr_sort; + +DROP TABLE rpr_planner; + +-- ============================================================ +-- Stress Tests +-- ============================================================ +-- Edge cases and stress scenarios + +CREATE TABLE rpr_stress (id INT, val INT); +INSERT INTO rpr_stress SELECT i, i * 10 FROM generate_series(1, 20) i; + +-- Very Long Query with Many Windows +SELECT id, val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2, + COUNT(*) OVER w3 as cnt3 +FROM rpr_stress +WINDOW w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +), +w2 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val > 50 +), +w3 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C+) + DEFINE C AS val > 100 +) +ORDER BY id; + +-- Deeply Nested Subqueries with RPR + +SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT id, val, + COUNT(*) OVER w as cnt + FROM rpr_stress + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) + ) sub1 + ) sub2 +) sub3 +WHERE cnt > 10 +ORDER BY id; + +-- Complex Expression in DEFINE Clause + +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE A AS (val % 3 = 0 OR val % 5 = 0), + B AS (val * 2 > 100 AND val / 2 < 100) +) +ORDER BY id; + +-- Window with No Matching Rows + +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WHERE val > 1000 -- No rows match +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +-- Window on Single Row + +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WHERE id = 10 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + +DROP TABLE rpr_stress; + +-- ============================================================ +-- Error Limit Tests +-- ============================================================ +-- Tests for error conditions in rpr.c + +CREATE TABLE rpr_errors (id INT, val INT); +INSERT INTO rpr_errors VALUES (1, 10), (2, 20); + +-- Test: PATTERN variable without DEFINE (A), DEFINE variable not in PATTERN (B) +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A) + DEFINE + B AS TRUE +); +-- Expected: Success - A is implicitly TRUE, B is filtered out + +-- Test: 3 variables in PATTERN, 253 in DEFINE (DEFINE filtering test) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0, V252 AS val > 0, V253 AS val > 0 +); +-- Expected: Success - unused DEFINE variables are filtered out + +-- Test: 251 variables in PATTERN, 252 in DEFINE (boundary - should succeed) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0, V252 AS val > 0 +); +-- Expected: Success - unused DEFINE variables are filtered out + +-- Test: 252 variables in PATTERN, 251 in DEFINE (exceeds limit with implicit TRUE) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0 +); +-- Expected: ERROR - too many pattern variables (Maximum is 251) + +-- Test: Pattern nesting at maximum depth (depth 253) +-- Note: 253 nested GROUP{3,7} quantifiers produce depth 253 after optimization +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + DEFINE A AS val > 0 +); +-- Expected: Should succeed + +-- Test: Pattern nesting depth exceeds maximum (depth 254) +-- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + DEFINE A AS val > 0 +); +-- Expected: ERROR - pattern nesting too deep + +DROP TABLE rpr_errors; + +-- ============================================================ +-- Jacob's Patterns +-- ============================================================ +-- Basic pattern matching tests from jacob branch + +-- Test: A? (optional, greedy) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A?) + DEFINE A AS val > 50 +); + +-- Test: A{2} (exact count) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2}) + DEFINE A AS val <= 50 +); + +-- Test: A{1,3} (bounded range, greedy) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}) + DEFINE A AS val <= 50 +); + +-- Test: A | B (simple alternation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B) + DEFINE A AS val <= 30, B AS val > 70 +); + +-- Test: A | B | C (three-way alternation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B | C) + DEFINE A AS val <= 20, B AS val BETWEEN 40 AND 60, C AS val > 80 +); + +-- Test: A B C (concatenation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 60 +); + +-- Test: A B? C (optional middle) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 60 +); + +-- Test: (A B)+ (grouped quantifier) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS val <= 50, B AS val > 50 +); + +-- Test: (A | B)+ C (alternation with quantifier) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 80 +); + +-- Test: (A+ | (A | B)+)* - nested alternation inside quantified group +-- Previously caused infinite recursion in nfa_advance_alt when the inner +-- BEGIN(+)'s skip jump was followed as an ALT branch pointer. +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM (VALUES + (1, ARRAY['A', 'B']), + (2, ARRAY['B']), + (3, ARRAY['C']) +) AS t(id, flags) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | (A | B)+)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Pathological Patterns +-- ============================================================ +-- These patterns previously caused issues. Now optimized or handled safely. + +-- Test: (A*)* - nested unbounded (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)*) + DEFINE A AS TRUE +); + +-- Test: (A*)+ - inner nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)+) + DEFINE A AS TRUE +); + +-- Test: (A+)* - outer nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)*) + DEFINE A AS TRUE +); + +-- Test: (A+)+ - both require match (optimized to A+) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)+) + DEFINE A AS TRUE +); + +-- Test: (((A)*)*)* - triple nested (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 3) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((((A)*)*)*) + DEFINE A AS TRUE +); + +-- Optional group with alternation: A ((B | C) (D | E))* F? +-- When only A matches, the * group matches 0 times and F? matches 0 times +SELECT id, val, match_len +FROM (SELECT id, val, + COUNT(*) OVER w AS match_len + FROM (VALUES (1, 1), (2, 99)) AS t(id, val) + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A ((B | C) (D | E))* F?) + DEFINE A AS val = 1, + B AS val = 2, C AS val = 3, + D AS val = 4, E AS val = 5, + F AS val = 6 + ) +) s; + +DROP TABLE rpr_plan; + +-- ============================================================ +-- End of rpr_base.sql +-- ============================================================ diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql new file mode 100644 index 00000000000..f8c8f62e594 --- /dev/null +++ b/src/test/regress/sql/rpr_explain.sql @@ -0,0 +1,2254 @@ +-- ============================================================ +-- RPR EXPLAIN Tests +-- Tests for Row Pattern Recognition EXPLAIN output +-- ============================================================ +-- +-- This test suite validates EXPLAIN output for RPR queries, +-- including NFA statistics shown in EXPLAIN ANALYZE: +-- - NFA States: peak, total, merged +-- - NFA Contexts: peak, total, absorbed, skipped +-- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) +-- - Pattern deparse formatting +-- - Multiple output formats (text, JSON, XML) +-- +-- Test Coverage: +-- Basic NFA Statistics Tests +-- State Statistics Tests +-- Context Statistics Tests +-- Match Length Statistics Tests +-- Mismatch Length Statistics Tests +-- JSON Format Tests +-- XML Format Tests +-- Multiple Partitions Tests +-- Edge Cases +-- Complex Pattern Tests +-- Real-world Pattern Examples +-- Performance-oriented Tests +-- INITIAL vs no INITIAL comparison +-- Quantifier Variations +-- Regression Tests for Statistics Accuracy +-- Alternation Pattern Tests +-- Group Pattern Tests +-- Window Function Combinations +-- DEFINE Expression Variations +-- Large Scale Statistics Verification +-- ============================================================ + +-- Filter function to normalize Storage memory values only (not NFA statistics). +-- NFA statistics should not change between platforms; if they do, it could +-- indicate issues such as uninitialized memory access. +-- Works for text, JSON, and XML formats. +create function rpr_explain_filter(text) returns setof text +language plpgsql as +$$ +declare + ln text; +begin + for ln in execute $1 + loop + -- Normalize memory size in Storage line only (platform-dependent) + -- Keep NFA statistics numbers unchanged (they are test assertions) + + -- Text format: "Storage: Memory Maximum Storage: 18kB" + if ln ~ 'Storage:.*Maximum Storage:' then + ln := regexp_replace(ln, '\m\d+kB', 'NkB', 'g'); + end if; + + -- JSON format: "Maximum Storage": 17 (number in kB units) + if ln ~ '"Maximum Storage":' then + ln := regexp_replace(ln, '"Maximum Storage": \d+', '"Maximum Storage": 0', 'g'); + end if; + + -- XML format: 17 (number in kB units) + if ln ~ '' then + ln := regexp_replace(ln, '\d+', '0', 'g'); + end if; + + return next ln; + end loop; +end; +$$; + +-- Setup: Create test tables +CREATE TEMP TABLE nfa_test ( + id serial, + v int, + cat char(1) +); + +-- Insert test data: 100 rows with predictable pattern +INSERT INTO nfa_test (v, cat) +SELECT i, + CASE + WHEN i % 5 = 1 THEN 'A' + WHEN i % 5 = 2 THEN 'B' + WHEN i % 5 = 3 THEN 'C' + WHEN i % 5 = 4 THEN 'D' + ELSE 'E' + END +FROM generate_series(1, 100) i; + +-- Additional test table with more complex patterns +CREATE TEMP TABLE nfa_complex ( + id serial, + price int, + trend char(1) -- U=up, D=down, S=stable +); + +INSERT INTO nfa_complex (price, trend) +VALUES + (100, 'S'), (105, 'U'), (110, 'U'), (108, 'D'), (112, 'U'), + (115, 'U'), (113, 'D'), (111, 'D'), (109, 'D'), (110, 'U'), + (120, 'U'), (125, 'U'), (130, 'U'), (128, 'D'), (126, 'D'), + (124, 'D'), (122, 'D'), (120, 'D'), (118, 'D'), (119, 'U'), + (121, 'U'), (123, 'U'), (125, 'U'), (127, 'U'), (129, 'U'), + (131, 'U'), (133, 'U'), (130, 'D'), (127, 'D'), (124, 'D'); + +-- ============================================================ +-- Basic NFA Statistics Tests +-- ============================================================ + +-- Simple pattern - should show basic statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = 'A', B AS cat = 'B' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = ''A'', B AS cat = ''B'' +)'); +DROP VIEW rpr_v; + +-- Pattern with no matches - 0 matched +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = 'X', Y AS cat = 'Y', Z AS cat = 'Z' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = ''X'', Y AS cat = ''Y'', Z AS cat = ''Z'' +);'); +DROP VIEW rpr_v; + +-- Pattern matching every row - high match count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +);'); +DROP VIEW rpr_v; + +-- Regression test: Space before parenthesis in pattern deparse +-- Verifies that "A (B | C)" correctly outputs as "a (b | c)" with space +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- Regression test: Sequential alternations at same depth +-- Verifies that "((B | C) (D | E))" correctly outputs as "(b | c) (d | e)" +-- Previously failed due to missing parentheses on ALT depth decrease +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- State Statistics Tests (peak, total, merged) +-- ============================================================ + +-- Simple quantifier pattern - A+ with short matches (no merging) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +);'); +DROP VIEW rpr_v; + +-- Alternation pattern - multiple state branches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); +DROP VIEW rpr_v; + +-- Complex pattern with high state count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- Grouped pattern with quantifier - state merging +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); +DROP VIEW rpr_v; + +-- State explosion pattern - many alternations +-- Pattern (A|B)(A|B)(A|B)(A|B) can create many parallel states +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); +DROP VIEW rpr_v; + +-- Consecutive ALT merge followed by different ALT +-- Tests mergeConsecutiveAlts flush on ALT change: (A|B){2} (C|D) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Consecutive ALT merge followed by non-ALT element +-- Tests mergeConsecutiveAlts flush on non-ALT: (A|B){2} c +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- ALT prefix/suffix absorbed into GROUP: (A|B) (A|B)+ (A|B) -> (A|B){3,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); +DROP VIEW rpr_v; + +-- High state merging - alternation with plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +);'); +DROP VIEW rpr_v; + +-- Nested quantifiers causing state growth +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Context Statistics Tests (peak, total, absorbed, skipped) +-- ============================================================ + +-- Context absorption with unbounded quantifier at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- No absorption - bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- Contexts skipped by SKIP PAST LAST ROW +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +);'); +DROP VIEW rpr_v; + +-- High context absorption - unbounded group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Match Length Statistics Tests +-- ============================================================ + +-- Fixed length matches - all same length +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); +DROP VIEW rpr_v; + +-- Variable length matches - min/max/avg differ +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); +DROP VIEW rpr_v; + +-- Very long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +);'); +DROP VIEW rpr_v; + +-- Mix of short and long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Mismatch Length Statistics Tests +-- ============================================================ + +-- Pattern that causes mismatches with length > 1 +-- Mismatch happens when partial match fails after processing multiple rows +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN 'A' + WHEN v % 10 IN (4,5) THEN 'B' + WHEN v % 10 = 6 THEN 'C' + ELSE 'X' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN ''A'' + WHEN v % 10 IN (4,5) THEN ''B'' + WHEN v % 10 = 6 THEN ''C'' + ELSE ''X'' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); +DROP VIEW rpr_v; + +-- Long partial matches that fail +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN 'A' + WHEN i <= 25 THEN 'B' + WHEN i = 26 THEN 'X' -- breaks the pattern + WHEN i <= 50 THEN 'A' + WHEN i <= 55 THEN 'B' + WHEN i = 56 THEN 'C' -- completes pattern + ELSE 'Y' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN ''A'' + WHEN i <= 25 THEN ''B'' + WHEN i = 26 THEN ''X'' -- breaks the pattern + WHEN i <= 50 THEN ''A'' + WHEN i <= 55 THEN ''B'' + WHEN i = 56 THEN ''C'' -- completes pattern + ELSE ''Y'' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- JSON Format Tests +-- ============================================================ + +-- JSON format output with all statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +)'); +DROP VIEW rpr_v; + +-- JSON format with match length statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +)'); +DROP VIEW rpr_v; + +-- ============================================================ +-- XML Format Tests +-- ============================================================ + +-- XML format output +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); +DROP VIEW rpr_v; + +-- JSON format with mismatch statistics +-- Pattern A B C expects 1,2,3 but gets 1,2,4 twice causing mismatches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +)'); +DROP VIEW rpr_v; + +-- JSON format with skipped context statistics +-- Alternation pattern with SKIP PAST LAST ROW causes many contexts to be skipped +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Multiple Partitions Tests +-- ============================================================ + +-- Statistics across multiple partitions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- Different pattern behavior per partition +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Edge Cases +-- ============================================================ + +-- Empty result set +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +);'); +DROP VIEW rpr_v; + +-- Single row +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +);'); +DROP VIEW rpr_v; + +-- Pattern longer than data +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +);'); +DROP VIEW rpr_v; + +-- All rows match as single match +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Complex Pattern Tests +-- ============================================================ + +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- Multiple alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); +DROP VIEW rpr_v; + +-- Optional elements +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Bounded quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); +DROP VIEW rpr_v; + +-- Star quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Real-world Pattern Examples +-- ============================================================ + +-- Stock price pattern - V-shape (down then up) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = 'D', U AS trend = 'U' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = ''D'', U AS trend = ''U'' +);'); +DROP VIEW rpr_v; + +-- Stock price pattern - peak (up, stable, down) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = 'U', S AS trend = 'S', D AS trend = 'D' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = ''U'', S AS trend = ''S'', D AS trend = ''D'' +);'); +DROP VIEW rpr_v; + +-- Consecutive increasing values (using PREV) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Performance-oriented Tests +-- ============================================================ + +-- Large dataset with simple pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); +DROP VIEW rpr_v; + +-- Large dataset with absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +);'); +DROP VIEW rpr_v; + +-- High state merge ratio +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- INITIAL vs no INITIAL comparison +-- ============================================================ + +-- With INITIAL keyword +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- Without INITIAL keyword (same behavior currently) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Quantifier Variations +-- ============================================================ + +-- Plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +);'); +DROP VIEW rpr_v; + +-- Star quantifier (zero or more) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Question mark (zero or one) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Exact count {n} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- Range {n,m} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- At least {n,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Regression Tests for Statistics Accuracy +-- ============================================================ + +-- Verify state count accuracy +-- Pattern A+ B with 20 rows should show predictable state behavior +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- Verify context count with known absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +);'); +DROP VIEW rpr_v; + +-- Verify match length with fixed-length pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Alternation Pattern Tests +-- ============================================================ + +-- Simple alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); +DROP VIEW rpr_v; + +-- Multiple items in alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); +DROP VIEW rpr_v; + +-- Alternation with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- Multiple alternatives (4+) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); +DROP VIEW rpr_v; + +-- Alternation at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Multiple sequential alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +);'); +DROP VIEW rpr_v; + +-- Quantified alternatives +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- Alternation at end +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Nested ALT at start of branch inside outer ALT +-- Pattern: (A ((B | C) D | E)) - preceding VAR + inner ALT as first branch element +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); +DROP VIEW rpr_v; + +-- Nested ALT at end of branch inside outer ALT +-- Pattern: (C (A | B) | D) - inner ALT is last element in outer branch +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Group Pattern Tests +-- ============================================================ + +-- Simple group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); +DROP VIEW rpr_v; + +-- Group with bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); +DROP VIEW rpr_v; + +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); +DROP VIEW rpr_v; + +-- Deep nesting (3+ levels) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); +DROP VIEW rpr_v; + +-- Bounded quantifier on alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- Nested groups with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- Partial nested quantification +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Window Function Combinations +-- ============================================================ + +-- count(*) with pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- first_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- last_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- Multiple window functions +CREATE TEMP VIEW rpr_v AS +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- DEFINE Expression Variations +-- ============================================================ + +-- Complex boolean expressions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +);'); +DROP VIEW rpr_v; + +-- Using PREV function +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +);'); +DROP VIEW rpr_v; + +-- Using NULL comparisons +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Large Scale Statistics Verification +-- ============================================================ + +-- 500 rows - verify statistics scale correctly +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +);'); +DROP VIEW rpr_v; + +-- High match count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); +DROP VIEW rpr_v; + +-- High skip count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +);'); +DROP VIEW rpr_v; + +-- Cleanup +DROP TABLE nfa_test; +DROP TABLE nfa_complex; diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql new file mode 100644 index 00000000000..9573d1dab3b --- /dev/null +++ b/src/test/regress/sql/rpr_nfa.sql @@ -0,0 +1,1865 @@ +-- ============================================================ +-- RPR NFA Tests +-- Tests for Row Pattern Recognition NFA Runtime Execution +-- ============================================================ +-- +-- This test suite validates the NFA (Non-deterministic Finite +-- Automaton) runtime execution engine in nodeWindowAgg.c, +-- focusing on update_reduced_frame and related functions. +-- +-- Test Strategy: +-- Diagonal pattern style using ARRAY flags to explicitly +-- control which pattern variables match at each row. +-- +-- Test Coverage: +-- Basic NFA Flow (match->absorb->advance) +-- Absorption Optimization +-- Context Lifecycle Management +-- Advance Phase (Epsilon Transitions) +-- Match Phase (Variable Matching) +-- Frame Boundary Handling +-- State Management (Deduplication) +-- Statistics and Diagnostics +-- Quantifier Runtime Behavior +-- Pathological Pattern Protection +-- Alternation Runtime Behavior +-- Deep Nested Groups +-- SKIP Options (Runtime) +-- INITIAL Mode (Runtime) +-- Frame Boundary Variations +-- Special Partition Cases +-- DEFINE Special Cases +-- Absorption Dynamic Flags +-- FIXME Issues (Known Limitations) +-- +-- Responsibility: +-- - NFA runtime execution paths +-- - Context/State lifecycle management +-- - Runtime boundary conditions and protections +-- +-- NOT tested here (covered in other files): +-- - Pattern parsing/optimization (rpr_base.sql) +-- - EXPLAIN output (rpr_explain.sql) +-- - PREV/NEXT semantics (rpr.sql) +-- ============================================================ + +-- ============================================================ +-- Basic NFA Flow +-- ============================================================ + +-- Simple sequential pattern +WITH test_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['_']) -- No match + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Quantified pattern (A+ B+ C+) +WITH test_quantified AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quantified +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+ C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Optional pattern (A B? C) +WITH test_optional AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- B skipped + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- B matched + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Alternation pattern (A (B|C) D) +WITH test_alternation AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- First branch + (3, ARRAY['D']), + (4, ARRAY['A']), + (5, ARRAY['C']), -- Second branch + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alternation +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C) D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Absorption Optimization +-- ============================================================ + +-- Absorbable pattern (A+) +WITH test_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Mixed absorbable/non-absorbable ((A+) | B) +WITH test_mixed_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- State coverage (same elemIdx, different count) +WITH test_state_coverage AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_state_coverage +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Context Lifecycle +-- ============================================================ + +-- Multiple overlapping contexts (SKIP TO NEXT ROW) +WITH test_overlapping_contexts AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_overlapping_contexts +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Failed context cleanup (early failure) +WITH test_context_cleanup AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned at first row + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after row 2 + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_context_cleanup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Partition end (incomplete contexts) +WITH test_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Pattern requires B, but partition ends + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Completed context encountered during processing +-- Pattern (A | B C D): Ctx1 takes long B->C->D path, while Ctx2 takes +-- short A path and completes first. Next row sees Ctx2 +-- with states=NULL and skips it. +WITH test_completed_ctx AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['D', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_completed_ctx +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A | B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Advance Phase (Epsilon Transitions) +-- ============================================================ + +-- Nested groups ((A B)+) +WITH test_nested_groups AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_groups +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Multiple alternation branches (A (B|C|D) E) +WITH test_multi_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['E']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['E']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D) E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + +-- Optional VAR at start (A? B C) +WITH test_optional_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), -- A skipped + (2, ARRAY['C']), + (3, ARRAY['A']), -- A matched + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A? B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Nested alternation ((A|B) (C|D)) +WITH test_nested_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- A C + (3, ARRAY['A']), + (4, ARRAY['D']), -- A D + (5, ARRAY['B']), + (6, ARRAY['C']), -- B C + (7, ARRAY['B']), + (8, ARRAY['D']) -- B D + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) (C | D)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Match Phase +-- ============================================================ + +-- Simple VAR with END next (A B C all min=max=1) +WITH test_simple_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_simple_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- VAR max exceeded (A{2,3}) +WITH test_max_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Max = 3 + (4, ARRAY['A']), -- Exceeds max, state removed + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,3} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Non-matching VAR (DEFINE false) +WITH test_non_matching AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['_']), -- B not matched (DEFINE false) + (3, ARRAY['A']), + (4, ARRAY['B']), -- B matched + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_matching +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- ============================================================ +-- Frame Boundary Handling +-- ============================================================ + +-- Limited frame (ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) +WITH test_limited_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), -- Within 3 FOLLOWING + (5, ARRAY['B']), -- Beyond 3 FOLLOWING from row 1 + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_limited_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Unbounded frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +WITH test_unbounded_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) -- Far from start, but unbounded + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unbounded_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Match exceeds frame boundary +WITH test_frame_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Frame ends at row 3 (2 FOLLOWING), B never appears + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Frame boundary forced mismatch +-- Limited frame with enough rows so that a context's frame boundary +-- is exceeded while still processing. +WITH test_frame_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- State Management +-- ============================================================ + +-- Duplicate state creation +WITH test_duplicate_states AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'B']), -- Both A and B match (creates duplicate states via different paths) + (2, ARRAY['C', '_']), + (3, ARRAY['D', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_duplicate_states +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Large pattern (stress free list) +WITH test_large_pattern AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']), + (7, ARRAY['G']), + (8, ARRAY['H']), + (9, ARRAY['I']), + (10, ARRAY['J']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags), + G AS 'G' = ANY(flags), + H AS 'H' = ANY(flags), + I AS 'I' = ANY(flags), + J AS 'J' = ANY(flags) +); + +-- Reduced frame map reallocation (> 1024 rows) +WITH test_map_realloc AS ( + SELECT id, CASE WHEN id % 2 = 1 THEN ARRAY['A'] ELSE ARRAY['B'] END AS flags + FROM generate_series(1, 1100) AS id +) +SELECT count(*), min(match_start), max(match_end) +FROM ( + SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end + FROM test_map_realloc + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) + ) +) sub; + +-- ============================================================ +-- Statistics and Diagnostics +-- ============================================================ + +-- Matched contexts +WITH test_matched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_matched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Pruned contexts (failed at first row) +WITH test_pruned AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned + (2, ARRAY['_']), -- Pruned + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_pruned +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Mismatched contexts (failed after multiple rows) +WITH test_mismatched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after 2 rows + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mismatched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Absorbed contexts +WITH test_absorbed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Skipped contexts (SKIP TO NEXT ROW) +WITH test_skipped AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Completes match starting at row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skipped +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Quantifier Runtime Behavior +-- ============================================================ + +-- Large count handling (A{100}) +WITH test_large_count AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 105) i +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_count +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{100}) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Unlimited quantifier (A{10,}) +WITH test_unlimited AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 15) i + UNION ALL + SELECT 16, ARRAY['B'] +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unlimited +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{10,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Min boundary (A{3,5}) +WITH test_min_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Min=3 reached, exit path available + (4, ARRAY['B']), -- Match ends at min + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), -- Count=5, max reached + (10, ARRAY['B']) -- Match ends at max + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_min_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Max boundary exceeded (A{3,5}) +WITH test_max_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), -- Count=6 > max=5, row 1 context removed + (7, ARRAY['B']) -- Row 1 context: no match (exceeded max) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Pathological Pattern Runtime Protection +-- ============================================================ + +-- Complex nested nullable ((A* B*)*) - Runtime protection +WITH test_complex_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_complex_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Nested nullable with quantifier ((A{0,3})*) +WITH test_nested_quantifier AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_quantifier +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A{0,3})*) + DEFINE + A AS 'A' = ANY(flags) +); + +-- ============================================================ +-- Alternation Runtime Behavior +-- ============================================================ + +-- Multi-branch alternation (A (B|C|D|E) F) +WITH test_multi_branch AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['F']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['F']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['F']), + (10, ARRAY['A']), + (11, ARRAY['E']), + (12, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_branch +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D | E) F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + +-- Alternation with quantifiers (A+ | B+ | C+) +WITH test_alt_quantifiers AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['C']), + (9, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_quantifiers +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ | B+ | C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- altPriority replacement (A B C | D) +-- D branch (higher altPriority) matches first at row 1, +-- then A B C branch (lower altPriority) replaces it at row 3. +WITH test_alt_replace AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'D']), + (2, ARRAY['B', '_']), + (3, ARRAY['C', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_replace +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C | D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Deep Nested Groups +-- ============================================================ + +-- Three-level nesting ((((A B)+)+)+) +WITH test_deep_nesting AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_deep_nesting +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((((A B)+)+)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Multiple groups in nesting (((A B) (C D))+) +WITH test_nested_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['C']), + (8, ARRAY['D']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B) (C D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Nested END→END max reached +-- Inner group (A B){2} reaches max=2 → exits to outer END +WITH test_end_nested_max AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_max +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Nested END→END between min/max +-- Inner group (A B){1,3} exits between min/max → outer END count++ +WITH test_end_nested_mid AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_mid +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){1,3})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- SKIP Options (Runtime) +-- ============================================================ + +-- SKIP PAST LAST ROW (non-overlapping matches) +WITH test_skip_past AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_past +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- SKIP TO NEXT ROW (overlapping matches) +WITH test_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- SKIP difference verification +WITH test_skip_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +ORDER BY mode, id; + +-- ============================================================ +-- INITIAL Mode (Runtime) +-- ============================================================ + +-- INITIAL mode (not yet supported - produces syntax error) +WITH test_initial_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_initial_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Default mode (include all rows) +WITH test_default_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched, but included + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched, but included + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_default_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Mode difference verification (INITIAL not yet supported - produces syntax error) +WITH test_mode_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), + (2, ARRAY['A']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'INITIAL' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +UNION ALL +SELECT 'DEFAULT' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +ORDER BY mode; + +-- ============================================================ +-- Frame Boundary Variations +-- ============================================================ + +-- Very limited frame (1 FOLLOWING) +WITH test_one_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- Within 1 FOLLOWING + (3, ARRAY['A']), -- Beyond 1 FOLLOWING from row 1 + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_one_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Medium frame (10 FOLLOWING) +WITH test_ten_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), + (10, ARRAY['A']), + (11, ARRAY['B']), -- Within 10 FOLLOWING from row 1 + (12, ARRAY['A']), + (13, ARRAY['B']) -- Beyond 10 FOLLOWING from row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_ten_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Exact boundary match +WITH test_exact_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) -- Exactly at 4 FOLLOWING (frame end) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_exact_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Special Partition Cases +-- ============================================================ + +-- Empty partition (0 rows) +WITH test_empty_partition AS ( + SELECT * FROM (VALUES + (1, 1, ARRAY['A']), + (2, 2, ARRAY['_']) -- Different partition + ) AS t(id, part, flags) +) +SELECT id, part, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_empty_partition +WHERE part = 99 -- No rows match +WINDOW w AS ( + PARTITION BY part + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Single row partition +WITH test_single_row AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_single_row +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + +-- All rows fail matching (all DEFINE false) +WITH test_all_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_all_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS false -- All rows fail +); + +-- Partition end with absorbable pattern +-- SKIP PAST LAST ROW + unbounded frame + all rows match A +-- Triggers absorb in !rowExists path at partition boundary. +WITH test_absorb_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorb_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- ============================================================ +-- DEFINE Special Cases +-- ============================================================ + +-- Undefined variable in DEFINE +WITH test_undefined_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['X']), -- B not defined, defaults to TRUE + (3, ARRAY['C']), + (4, ARRAY['A']), + (5, ARRAY['_']), -- B defaults to TRUE, but no flags + (6, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_undefined_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + -- B is undefined, defaults to TRUE + C AS 'C' = ANY(flags) +); + +-- ============================================================ +-- Absorption Dynamic Flags +-- ============================================================ + +-- Partial absorbable pattern ((A+) B) +WITH test_partial_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partial_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Dynamic flag update ((A+) | B) +WITH test_dynamic_flags AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_dynamic_flags +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Non-absorbable context during absorption +-- Pattern (A B)+ C: A,B in absorbable group, C is not. +-- When END exits to C via nfa_state_create, isAbsorbable becomes false. +WITH test_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Absorption flags early return (!hasAbsorbableState) +-- Pattern (A B)+ C D with SKIP PAST LAST ROW +-- After reaching C (non-absorbable), hasAbsorbableState becomes false. +-- On next row (D), the early return fires. +WITH test_absorption_early_return AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorption_early_return +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Coverage failure: older can't cover newer's states +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: only A → Ctx1 takes A branch only (B fails). +-- Row 2: A and B → Ctx2 takes both branches. +-- Absorption: Ctx1 has A but no B → can't cover Ctx2's B state → fails. +WITH test_coverage_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', '_']), + (2, ARRAY['A', 'B']), + (3, ARRAY['A', '_']), + (4, ARRAY['A', '_']), + (5, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_coverage_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Absorb skips completed context (older->states==NULL) +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: A only → Ctx1 takes A branch. Row 2: B only → Ctx1 A fails (completed). +-- Ctx2 takes B branch. Absorption: Ctx1 states==NULL → skip. +WITH test_older_completed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_completed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Absorb skips non-absorbable context (!hasAbsorbableState) +-- Pattern A+ | B C with SKIP PAST LAST ROW (only A+ branch absorbable). +-- Row 1: B only → Ctx1 takes B branch (non-absorbable), advances to C. +-- Row 2: C,A → Ctx1 C matches (hasAbsorbableState=false). Ctx2 takes A (absorbable). +-- Absorption: Ctx1 !hasAbsorbableState → skip. +WITH test_older_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['_', 'A']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- ============================================================ +-- FIXME Issues - Known Limitations +-- ============================================================ + +-- FIXME 1 - altPriority lexical order +WITH test_alt_priority_repeated AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- Both A and B match + (2, ARRAY['A','B']), + (3, ARRAY['A','B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_repeated +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- FIXME 1 - Nested ALT lexical order +WITH test_alt_priority_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['C','D']), + (3, ARRAY['A','B']), + (4, ARRAY['C','D']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A | B) (C | D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- FIXME 2 - Cycle prevention at count > 0 +WITH test_cycle_nonzero AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Inner A* matches 0, cycles at count=3 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_nonzero +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A*)*) + DEFINE + A AS 'A' = ANY(flags) +); + +-- FIXME 2 - Cycle with mixed nullables +WITH test_cycle_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); -- 2.43.0