From 3dba60529bdebbd2d118bd05347eb863109ffce9 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sun, 15 Feb 2026 17:47:47 +0900 Subject: [PATCH v43 1/8] Row pattern recognition patch for raw parser. The series of patches are to implement the row pattern recognition (SQL/RPR) feature. Currently the implementation is a subset of SQL/RPR (ISO/IEC 19075-2:2016). Namely, implementation of some features of R020 (WINDOW clause). R010 (MATCH_RECOGNIZE) is out of the scope of the patches. Currently following features are implemented in the patches. - PATTERN - PATTERN regular expressions (+, *, ?) alternation (|), grouping () , {n}, {n,}, {n,m}, {,m} - DEFINE - INITIAL - AFTER MATCH SKIP TO PAST LAST ROW - AFTER MATCH SKIP TO NEXT ROW Currently following features are not implemented in the patches. - MEASURE - SUBSET - SEEK - AFTER MATCH SKIP TO - AFTER MATCH SKIP TO FIRST - AFTER MATCH SKIP TO LAST - PATTERN regular expression reluctant quantifiers (*? etc.), {- and -}, () (empty pattern) Anchores (^, $) are not permitted with RPR in Window clause by the standard. - PERMUTE - FIRST, LAST, CLASSIFIER Author: Tatsuo Ishii Author: Henson Choi Reviewed-by: Vik Fearing Reviewed-by: Jacob Champion Reviewed-by: Peter Eisentraut Reviewed-by: NINGWEI CHEN Reviewed-by: "David G. Johnston" Reviewed-by: Chao Li Reviewed-by: Henson Choi Reviewed-by: Tatsuo Ishii Discussion: https://postgr.es/m/20230625.210509.1276733411677577841.t-ishii%40sranhm.sra.co.jp ---- Major differences from v41 patches ---- - Add Peter as a reviewer to commit message. He gave a good valuable suggestions to the gram.y patch. I accidentally forgot to credit his name. - Tons of enhancements from Henson. -- Fully rewrite absorption logic (optimizer/plan/rpr.c). Example: (a+ b* c?){2,} -> (a+" b* c?){2,} -- Add safety check to tryMultiplyQuantifiers (optimizer/plan/rpr.c) Example: (A{2}){2,3} cannot be rewritten to A{4,6} because it allows 5 times reputation which is not correct. Perform multiplication optimization only in safe case. -- Fix infinite loop/segfault when unlimited quantifiers (e.g. (A*)*, (A+)+) are used. -- Optimize unlimited quantifiers if possible (e.g. (A*)* -> A*, (A+)+ -> A+) -- Fix segfault case (SELECT id, flag ...WINDOW w AS (... DEFINE T AS flag) -- More accurate error position report -- Some code refactoring in parser/planner patches -- Split regression test file into rpr.sql and rpr_base.sql -- Fix outfuncs.c and readfuncs.c -- Fix some corner cases bug of regular expression optimization -- Remove unused PATTERN variable to enhance performance -- Change treatment of undefined DEFINE variables. Previously if variable "A" is used in PATTERN but not defined in DEFINE, A is automatically defined as "A AS TRUE". Now A is evaluated as TRUE in executor and the generated entry for A is removed. This improves memory usage. -- EXPLAIN now shows the absorbable points (a+") - Bug fix from me. -- Assertion failure when two or more window clauses are used. --- src/backend/parser/gram.y | 425 ++++++++++++++++++++++++++++++-- src/backend/parser/scan.l | 4 +- src/include/nodes/parsenodes.h | 67 +++++ src/include/parser/kwlist.h | 5 + src/include/parser/parse_node.h | 1 + 5 files changed, 483 insertions(+), 19 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c567252acc4..e6ae23f368b 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -209,6 +209,8 @@ static void preprocess_pub_all_objtype_list(List *all_objects_list, static void preprocess_pubobj_list(List *pubobjspec_list, core_yyscan_t yyscanner); static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); +static RPRPatternNode *makeRPRQuantifier(int min, int max, ParseLoc reluctant, int location, + core_yyscan_t yyscanner); %} @@ -682,6 +684,15 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); json_object_constructor_null_clause_opt json_array_constructor_null_clause_opt +%type row_pattern_definition +%type opt_row_pattern_common_syntax + row_pattern row_pattern_alt row_pattern_seq + row_pattern_term row_pattern_primary + row_pattern_quantifier_opt +%type row_pattern_definition_list +%type opt_row_pattern_skip_to +%type opt_row_pattern_initial_or_seek + /* * Non-keyword token types. These are hard-wired into the "flex" lexer. * They must be listed first so that their numeric codes do not depend on @@ -724,7 +735,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS - DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC + DEFERRABLE DEFERRED DEFINE DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP @@ -740,7 +751,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P IDENTITY_P IF_P IGNORE_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE - INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P + INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIAL INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -765,8 +776,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); ORDER ORDINALITY OTHERS OUT_P OUTER_P OVER OVERLAPS OVERLAY OVERRIDING OWNED OWNER - PARALLEL PARAMETER PARSER PARTIAL PARTITION PARTITIONS PASSING PASSWORD PATH - PERIOD PLACING PLAN PLANS POLICY + PARALLEL PARAMETER PARSER PARTIAL PARTITION PARTITIONS PASSING PASSWORD PAST PATH + PATTERN_P PERIOD PLACING PLAN PLANS POLICY POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROCEDURES PROGRAM PUBLICATION @@ -777,7 +788,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); RESET RESPECT_P RESTART RESTRICT RETURN RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP ROUTINE ROUTINES ROW ROWS RULE - SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SELECT + SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SEEK SELECT SEQUENCE SEQUENCES SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SPLIT SOURCE SQL_P STABLE STANDALONE_P @@ -860,8 +871,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); * reference point for a precedence level that we can assign to other * keywords that lack a natural precedence level. * - * We need to do this for PARTITION, RANGE, ROWS, and GROUPS to support - * opt_existing_window_name (see comment there). + * We need to do this for PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL, + * SEEK, PATTERN_P to support opt_existing_window_name (see comment there). * * The frame_bound productions UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING * are even messier: since UNBOUNDED is an unreserved keyword (per spec!), @@ -891,7 +902,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %nonassoc UNBOUNDED NESTED /* ideally would have same precedence as IDENT */ %nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH -%left Op OPERATOR /* multi-character ops and user-defined operators */ + AFTER INITIAL SEEK PATTERN_P +%left Op OPERATOR '|' /* multi-character ops and user-defined operators */ %left '+' '-' %left '*' '/' '%' %left '^' @@ -16636,6 +16648,8 @@ over_clause: OVER window_specification n->startOffset = NULL; n->endOffset = NULL; n->location = @2; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | /*EMPTY*/ @@ -16643,7 +16657,8 @@ over_clause: OVER window_specification ; window_specification: '(' opt_existing_window_name opt_partition_clause - opt_sort_clause opt_frame_clause ')' + opt_sort_clause opt_frame_clause + opt_row_pattern_common_syntax ')' { WindowDef *n = makeNode(WindowDef); @@ -16655,20 +16670,23 @@ window_specification: '(' opt_existing_window_name opt_partition_clause n->frameOptions = $5->frameOptions; n->startOffset = $5->startOffset; n->endOffset = $5->endOffset; + n->frameLocation = $5->frameLocation; + n->excludeLocation = $5->excludeLocation; + n->rpCommonSyntax = (RPCommonSyntax *)$6; n->location = @1; $$ = n; } ; /* - * If we see PARTITION, RANGE, ROWS or GROUPS as the first token after the '(' - * of a window_specification, we want the assumption to be that there is - * no existing_window_name; but those keywords are unreserved and so could - * be ColIds. We fix this by making them have the same precedence as IDENT - * and giving the empty production here a slightly higher precedence, so - * that the shift/reduce conflict is resolved in favor of reducing the rule. - * These keywords are thus precluded from being an existing_window_name but - * are not reserved for any other purpose. + * If we see PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL, SEEK or PATTERN_P + * as the first token after the '(' of a window_specification, we want the + * assumption to be that there is no existing_window_name; but those keywords + * are unreserved and so could be ColIds. We fix this by making them have the + * same precedence as IDENT and giving the empty production here a slightly + * higher precedence, so that the shift/reduce conflict is resolved in favor + * of reducing the rule. These keywords are thus precluded from being an + * existing_window_name but are not reserved for any other purpose. */ opt_existing_window_name: ColId { $$ = $1; } | /*EMPTY*/ %prec Op { $$ = NULL; } @@ -16689,6 +16707,8 @@ opt_frame_clause: n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_RANGE; n->frameOptions |= $3; + n->frameLocation = @1; + n->excludeLocation = ($3 != 0) ? @3 : -1; $$ = n; } | ROWS frame_extent opt_window_exclusion_clause @@ -16697,6 +16717,8 @@ opt_frame_clause: n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_ROWS; n->frameOptions |= $3; + n->frameLocation = @1; + n->excludeLocation = ($3 != 0) ? @3 : -1; $$ = n; } | GROUPS frame_extent opt_window_exclusion_clause @@ -16705,6 +16727,8 @@ opt_frame_clause: n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_GROUPS; n->frameOptions |= $3; + n->frameLocation = @1; + n->excludeLocation = ($3 != 0) ? @3 : -1; $$ = n; } | /*EMPTY*/ @@ -16714,6 +16738,8 @@ opt_frame_clause: n->frameOptions = FRAMEOPTION_DEFAULTS; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } ; @@ -16789,6 +16815,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_UNBOUNDED_PRECEDING; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | UNBOUNDED FOLLOWING @@ -16798,6 +16826,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_UNBOUNDED_FOLLOWING; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | CURRENT_P ROW @@ -16807,6 +16837,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_CURRENT_ROW; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | a_expr PRECEDING @@ -16816,6 +16848,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_OFFSET_PRECEDING; n->startOffset = $1; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | a_expr FOLLOWING @@ -16825,6 +16859,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_OFFSET_FOLLOWING; n->startOffset = $1; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } ; @@ -16837,6 +16873,325 @@ opt_window_exclusion_clause: | /*EMPTY*/ { $$ = 0; } ; +opt_row_pattern_common_syntax: +opt_row_pattern_skip_to opt_row_pattern_initial_or_seek + PATTERN_P '(' row_pattern ')' + DEFINE row_pattern_definition_list + { + RPCommonSyntax *n = makeNode(RPCommonSyntax); + n->rpSkipTo = $1; + n->initial = $2; + n->rpPattern = (RPRPatternNode *) $5; + n->rpDefs = $8; + $$ = (Node *) n; + } + | /*EMPTY*/ { $$ = NULL; } + ; + +opt_row_pattern_skip_to: + AFTER MATCH SKIP TO NEXT ROW + { + $$ = ST_NEXT_ROW; + } + | AFTER MATCH SKIP PAST LAST_P ROW + { + $$ = ST_PAST_LAST_ROW; + } + | /*EMPTY*/ + { + $$ = ST_PAST_LAST_ROW; + } + ; + +opt_row_pattern_initial_or_seek: + INITIAL { $$ = true; } + | SEEK + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("SEEK is not supported"), + errhint("Use INITIAL instead."), + parser_errposition(@1))); + } + | /*EMPTY*/ { $$ = true; } + ; + +row_pattern: + row_pattern_alt { $$ = $1; } + ; + +row_pattern_alt: + row_pattern_seq { $$ = $1; } + | row_pattern_alt '|' row_pattern_seq + { + RPRPatternNode *n; + /* If left side is already ALT, append to it */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_ALT) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, $3); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_ALT; + n->children = list_make2($1, $3); + n->min = 1; + n->max = 1; + n->reluctant = -1; + n->location = @1; + $$ = (Node *) n; + } + } + ; + +row_pattern_seq: + row_pattern_term { $$ = $1; } + | row_pattern_seq row_pattern_term + { + RPRPatternNode *n; + /* If left side is already SEQ, append to it */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_SEQ) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, $2); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_SEQ; + n->children = list_make2($1, $2); + n->min = 1; + n->max = 1; + n->reluctant = -1; + n->location = @1; + $$ = (Node *) n; + } + } + ; + +row_pattern_term: + row_pattern_primary row_pattern_quantifier_opt + { + RPRPatternNode *n = (RPRPatternNode *) $1; + RPRPatternNode *q = (RPRPatternNode *) $2; + + n->min = q->min; + n->max = q->max; + n->reluctant = q->reluctant; + $$ = (Node *) n; + } + ; + +row_pattern_primary: + ColId + { + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_VAR; + n->varName = $1; + n->min = 1; + n->max = 1; + n->reluctant = -1; + n->children = NIL; + n->location = @1; + $$ = (Node *) n; + } + | '(' row_pattern ')' + { + RPRPatternNode *inner = (RPRPatternNode *) $2; + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_GROUP; + n->children = list_make1(inner); + n->min = 1; + n->max = 1; + n->reluctant = -1; + n->location = @1; + $$ = (Node *) n; + } + ; + +row_pattern_quantifier_opt: + /* EMPTY */ { $$ = (Node *) makeRPRQuantifier(1, 1, -1, @$, yyscanner); } + | '*' { $$ = (Node *) makeRPRQuantifier(0, INT_MAX, -1, @1, yyscanner); } + | '+' { $$ = (Node *) makeRPRQuantifier(1, INT_MAX, -1, @1, yyscanner); } + | Op + { + /* Handle single Op: ? or reluctant quantifiers *?, +?, ?? */ + if (strcmp($1, "?") == 0) + $$ = (Node *) makeRPRQuantifier(0, 1, -1, @1, yyscanner); + else if (strcmp($1, "*?") == 0) + $$ = (Node *) makeRPRQuantifier(0, INT_MAX, @1 + 1, @1, yyscanner); + else if (strcmp($1, "+?") == 0) + $$ = (Node *) makeRPRQuantifier(1, INT_MAX, @1 + 1, @1, yyscanner); + else if (strcmp($1, "??") == 0) + $$ = (Node *) makeRPRQuantifier(0, 1, @1 + 1, @1, yyscanner); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier \"%s\"", $1), + errhint("Valid quantifiers are: *, +, ?, *?, +?, ??, {n}, {n,}, {,m}, {n,m} and their reluctant versions."), + parser_errposition(@1))); + } + /* RELUCTANT quantifiers (when lexer separates tokens) */ + | '*' Op + { + if (strcmp($2, "?") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token after \"*\" quantifier"), + errhint("Did you mean \"*?\" for reluctant quantifier?"), + parser_errposition(@2))); + $$ = (Node *) makeRPRQuantifier(0, INT_MAX, @2, @1, yyscanner); + } + | '+' Op + { + if (strcmp($2, "?") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token after \"+\" quantifier"), + errhint("Did you mean \"+?\" for reluctant quantifier?"), + parser_errposition(@2))); + $$ = (Node *) makeRPRQuantifier(1, INT_MAX, @2, @1, yyscanner); + } + | Op Op + { + if (strcmp($1, "?") != 0 || strcmp($2, "?") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid quantifier combination"), + errhint("Did you mean \"??\" for reluctant quantifier?"), + parser_errposition(@1))); + $$ = (Node *) makeRPRQuantifier(0, 1, @2, @1, yyscanner); + } + /* {n}, {n,}, {,m}, {n,m} quantifiers */ + | '{' Iconst '}' + { + if ($2 <= 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $2, -1, @1, yyscanner); + } + | '{' Iconst ',' '}' + { + if ($2 < 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, INT_MAX, -1, @1, yyscanner); + } + | '{' ',' Iconst '}' + { + if ($3 <= 0 || $3 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", INT_MAX - 1), + parser_errposition(@3)); + $$ = (Node *) makeRPRQuantifier(0, $3, -1, @1, yyscanner); + } + | '{' Iconst ',' Iconst '}' + { + if ($2 < 0 || $4 <= 0 || $2 >= INT_MAX || $4 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bounds must be between 0 and %d with max >= 1", INT_MAX - 1), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $4, -1, @1, yyscanner); + } + /* Reluctant versions: {n}?, {n,}?, {,m}?, {n,m}? */ + | '{' Iconst '}' Op + { + if (strcmp($4, "?") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token after range quantifier"), + errhint("Only \"?\" is allowed after {n} to make it reluctant."), + parser_errposition(@4))); + if ($2 <= 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $2, @4, @1, yyscanner); + } + | '{' Iconst ',' '}' Op + { + if (strcmp($5, "?") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token after range quantifier"), + errhint("Only \"?\" is allowed after {n,} or {,m} to make it reluctant."), + parser_errposition(@5))); + if ($2 < 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, INT_MAX, @5, @1, yyscanner); + } + | '{' ',' Iconst '}' Op + { + if (strcmp($5, "?") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token after range quantifier"), + errhint("Only \"?\" is allowed after {n,} or {,m} to make it reluctant."), + parser_errposition(@5))); + if ($3 <= 0 || $3 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", INT_MAX - 1), + parser_errposition(@3)); + $$ = (Node *) makeRPRQuantifier(0, $3, @5, @1, yyscanner); + } + | '{' Iconst ',' Iconst '}' Op + { + if (strcmp($6, "?") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token after range quantifier"), + errhint("Only \"?\" is allowed after {n,m} to make it reluctant."), + parser_errposition(@6))); + if ($2 < 0 || $4 <= 0 || $2 >= INT_MAX || $4 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bounds must be between 0 and %d with max >= 1", INT_MAX - 1), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $4, @6, @1, yyscanner); + } + ; + +row_pattern_definition_list: + row_pattern_definition { $$ = list_make1($1); } + | row_pattern_definition_list ',' row_pattern_definition { $$ = lappend($1, $3); } + ; + +row_pattern_definition: + ColId AS a_expr + { + $$ = makeNode(ResTarget); + $$->name = $1; + $$->indirection = NIL; + $$->val = (Node *) $3; + $$->location = @1; + } + ; /* * Supporting nonterminals for expressions. @@ -16881,12 +17236,15 @@ MathOp: '+' { $$ = "+"; } | LESS_EQUALS { $$ = "<="; } | GREATER_EQUALS { $$ = ">="; } | NOT_EQUALS { $$ = "<>"; } + | '|' { $$ = "|"; } ; qual_Op: Op { $$ = list_make1(makeString($1)); } | OPERATOR '(' any_operator ')' { $$ = $3; } + | '|' + { $$ = list_make1(makeString("|")); } ; qual_all_Op: @@ -17976,6 +18334,7 @@ unreserved_keyword: | DECLARE | DEFAULTS | DEFERRED + | DEFINE | DEFINER | DELETE_P | DELIMITER @@ -18041,6 +18400,7 @@ unreserved_keyword: | INDEXES | INHERIT | INHERITS + | INITIAL | INLINE_P | INPUT_P | INSENSITIVE @@ -18116,7 +18476,9 @@ unreserved_keyword: | PARTITIONS | PASSING | PASSWORD + | PAST | PATH + | PATTERN_P | PERIOD | PLAN | PLANS @@ -18170,6 +18532,7 @@ unreserved_keyword: | SEARCH | SECOND_P | SECURITY + | SEEK | SEQUENCE | SEQUENCES | SERIALIZABLE @@ -18557,6 +18920,7 @@ bare_label_keyword: | DEFAULTS | DEFERRABLE | DEFERRED + | DEFINE | DEFINER | DELETE_P | DELIMITER @@ -18635,6 +18999,7 @@ bare_label_keyword: | INDEXES | INHERIT | INHERITS + | INITIAL | INITIALLY | INLINE_P | INNER_P @@ -18748,7 +19113,9 @@ bare_label_keyword: | PARTITIONS | PASSING | PASSWORD + | PAST | PATH + | PATTERN_P | PERIOD | PLACING | PLAN @@ -18807,6 +19174,7 @@ bare_label_keyword: | SCROLL | SEARCH | SECURITY + | SEEK | SELECT | SEQUENCE | SEQUENCES @@ -19998,6 +20366,29 @@ makeRecursiveViewSelect(char *relname, List *aliases, Node *query) return (Node *) s; } +/* + * makeRPRQuantifier + * Create an RPRPatternNode with specified quantifier bounds. + */ +static RPRPatternNode * +makeRPRQuantifier(int min, int max, ParseLoc reluctant, int location, + core_yyscan_t yyscanner) +{ + RPRPatternNode *n = makeNode(RPRPatternNode); + + if (reluctant >= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("reluctant quantifiers are not yet supported"), + parser_errposition(reluctant))); + + n->min = min; + n->max = max; + n->reluctant = reluctant; + n->location = location; + return n; +} + /* parser_init() * Initialize to parse one query string */ diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 6c162f48342..fa04ec2b951 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -359,7 +359,7 @@ not_equals "!=" * If you change either set, adjust the character lists appearing in the * rule for "operator"! */ -self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] +self [,()\[\].;\:\+\-\*\/\%\^\<\>\=\|] op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=] operator {op_chars}+ @@ -930,7 +930,7 @@ other . * that the "self" rule would have. */ if (nchars == 1 && - strchr(",()[].;:+-*/%^<>=", yytext[0])) + strchr(",()[].;:+-*/%^<>=|", yytext[0])) return yytext[0]; /* * Likewise, if what we have left is two chars, and diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 0aec49bdd22..3a472481fb2 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -578,6 +578,57 @@ typedef struct SortBy ParseLoc location; /* operator location, or -1 if none/unknown */ } SortBy; +/* + * AFTER MATCH row pattern skip to types in row pattern common syntax + */ +typedef enum RPSkipTo +{ + ST_NONE, /* AFTER MATCH omitted */ + ST_NEXT_ROW, /* SKIP TO NEXT ROW */ + ST_PAST_LAST_ROW, /* SKIP TO PAST LAST ROW */ +} RPSkipTo; + +/* + * RPRPatternNodeType - Row Pattern Recognition pattern node types + */ +typedef enum RPRPatternNodeType +{ + RPR_PATTERN_VAR, /* variable reference */ + RPR_PATTERN_SEQ, /* sequence (concatenation) */ + RPR_PATTERN_ALT, /* alternation (|) */ + RPR_PATTERN_GROUP /* group (parentheses) */ +} RPRPatternNodeType; + +/* + * RPRPatternNode - Row Pattern Recognition pattern AST node + */ +typedef struct RPRPatternNode +{ + NodeTag type; /* T_RPRPatternNode */ + RPRPatternNodeType nodeType; /* VAR, SEQ, ALT, GROUP */ + int min; /* minimum repetitions (0 for *, ?) */ + int max; /* maximum repetitions (INT_MAX for *, +) */ + ParseLoc reluctant; /* location of '?' for reluctant, -1 for + * greedy */ + ParseLoc location; /* token location, or -1 */ + char *varName; /* VAR: variable name */ + List *children; /* SEQ, ALT, GROUP: child nodes */ +} RPRPatternNode; + +/* + * RowPatternCommonSyntax - raw representation of row pattern common syntax + */ +typedef struct RPCommonSyntax +{ + NodeTag type; + RPSkipTo rpSkipTo; /* Row Pattern AFTER MATCH SKIP type */ + bool initial; /* true if is + * initial */ + RPRPatternNode *rpPattern; /* PATTERN clause AST */ + List *rpDefs; /* row pattern definitions clause (list of + * ResTarget) */ +} RPCommonSyntax; + /* * WindowDef - raw representation of WINDOW and OVER clauses * @@ -593,10 +644,13 @@ typedef struct WindowDef char *refname; /* referenced window name, if any */ List *partitionClause; /* PARTITION BY expression list */ List *orderClause; /* ORDER BY (list of SortBy) */ + RPCommonSyntax *rpCommonSyntax; /* row pattern common syntax */ int frameOptions; /* frame_clause options, see below */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ ParseLoc location; /* parse location, or -1 if none/unknown */ + ParseLoc frameLocation; /* ROWS/RANGE/GROUPS location, or -1 */ + ParseLoc excludeLocation; /* EXCLUDE location, or -1 */ } WindowDef; /* @@ -1589,6 +1643,11 @@ typedef struct GroupingSet * the orderClause might or might not be copied (see copiedOrder); the framing * options are never copied, per spec. * + * "defineClause" is Row Pattern Recognition DEFINE clause (list of + * TargetEntry). TargetEntry.resname represents row pattern definition + * variable name. "rpPattern" represents PATTERN clause as an AST tree + * (RPRPatternNode). + * * The information relevant for the query jumbling is the partition clause * type and its bounds. */ @@ -1618,6 +1677,14 @@ typedef struct WindowClause Index winref; /* ID referenced by window functions */ /* did we copy orderClause from refname? */ bool copiedOrder pg_node_attr(query_jumble_ignore); + /* Row Pattern AFTER MATCH SKIP clause */ + RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ + bool initial; /* true if is + * initial */ + /* Row Pattern DEFINE clause (list of TargetEntry) */ + List *defineClause; + /* Row Pattern PATTERN clause AST */ + RPRPatternNode *rpPattern; } WindowClause; /* diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index f7753c5c8a8..1624f4412dc 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -129,6 +129,7 @@ PG_KEYWORD("default", DEFAULT, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("defaults", DEFAULTS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferrable", DEFERRABLE, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferred", DEFERRED, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("define", DEFINE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("definer", DEFINER, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("delete", DELETE_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("delimiter", DELIMITER, UNRESERVED_KEYWORD, BARE_LABEL) @@ -217,6 +218,7 @@ PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("indexes", INDEXES, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inherit", INHERIT, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inherits", INHERITS, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("initial", INITIAL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inline", INLINE_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inner", INNER_P, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL) @@ -342,7 +344,9 @@ PG_KEYWORD("partition", PARTITION, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("partitions", PARTITIONS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("passing", PASSING, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("password", PASSWORD, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("past", PAST, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("path", PATH, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("pattern", PATTERN_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("period", PERIOD, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("placing", PLACING, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("plan", PLAN, UNRESERVED_KEYWORD, BARE_LABEL) @@ -405,6 +409,7 @@ PG_KEYWORD("scroll", SCROLL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("search", SEARCH, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("second", SECOND_P, UNRESERVED_KEYWORD, AS_LABEL) PG_KEYWORD("security", SECURITY, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("seek", SEEK, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("select", SELECT, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("sequence", SEQUENCE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("sequences", SEQUENCES, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index f23e21f318b..4dc7e5738ae 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -51,6 +51,7 @@ typedef enum ParseExprKind EXPR_KIND_WINDOW_FRAME_RANGE, /* window frame clause with RANGE */ EXPR_KIND_WINDOW_FRAME_ROWS, /* window frame clause with ROWS */ EXPR_KIND_WINDOW_FRAME_GROUPS, /* window frame clause with GROUPS */ + EXPR_KIND_RPR_DEFINE, /* DEFINE */ EXPR_KIND_SELECT_TARGET, /* SELECT target list item */ EXPR_KIND_INSERT_TARGET, /* INSERT target list item */ EXPR_KIND_UPDATE_SOURCE, /* UPDATE assignment source item */ -- 2.43.0