From 7a69cb818e4d1c37998266a8c1883d5454a8d9f5 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Wed, 17 Jun 2026 14:55:51 +0900 Subject: [PATCH 08/13] Refactor transformDefineClause in row pattern recognition Two related cleanups to DEFINE clause parse analysis, with no change to planner or executor output beyond one error-cursor position: - Hoist the "DEFINE variable not used in PATTERN" cross-check out of the recursive validateRPRPatternVarCount() into its caller. The check only needs to run once, so the rpDefs argument and its NULL-sentinel gating are gone, and the recursive routine now only counts pattern variables. - Reorder per-variable DEFINE processing to transformExpr -> coerce_to_boolean -> pull_var_clause and drop the separate second coercion pass, so pull_var_clause always operates on the final coerced expression and a type mismatch is reported before the targetlist is touched. The duplicate-variable check moves to its own leading loop and now reports at the later (duplicate) definition. Add regression coverage for DEFINE coercion and Var propagation: a boolean-domain predicate (the one case where coerce_to_boolean is not a no-op), a Var referenced only inside a navigation operation, and rejection of a non-boolean DEFINE expression. --- src/backend/parser/parse_rpr.c | 163 +++++++++++-------------- src/test/regress/expected/rpr_base.out | 78 +++++++++++- src/test/regress/sql/rpr_base.sql | 59 +++++++++ 3 files changed, 209 insertions(+), 91 deletions(-) diff --git a/src/backend/parser/parse_rpr.c b/src/backend/parser/parse_rpr.c index 3e6b2e579a3..116cd206e39 100644 --- a/src/backend/parser/parse_rpr.c +++ b/src/backend/parser/parse_rpr.c @@ -53,7 +53,7 @@ typedef struct /* Forward declarations */ static void validateRPRPatternVarCount(ParseState *pstate, RPRPatternNode *node, - List *rpDefs, List **varNames); + List **varNames); static List *transformDefineClause(ParseState *pstate, WindowDef *windef, List **targetlist); static bool define_walker(Node *node, void *context); @@ -192,15 +192,14 @@ transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, * Throws an error if the number of unique variables would require a varId * greater than RPR_VARID_MAX. * - * If rpDefs is non-NULL, each DEFINE variable name is also validated against - * varNames; any DEFINE name not present in PATTERN is rejected with an error. - * varNames itself is not extended by this step -- it carries only PATTERN - * variable names, which is what transformColumnRef checks via - * p_rpr_pattern_vars to identify pattern variable qualifiers. + * varNames collects the unique PATTERN variable names, which is what + * transformColumnRef checks via p_rpr_pattern_vars to identify pattern + * variable qualifiers. Cross-checking DEFINE variable names against this + * list is the caller's responsibility, since it only needs to run once. */ static void validateRPRPatternVarCount(ParseState *pstate, RPRPatternNode *node, - List *rpDefs, List **varNames) + List **varNames) { /* Pattern node must exist - parser always provides non-NULL root */ Assert(node != NULL); @@ -255,39 +254,10 @@ validateRPRPatternVarCount(ParseState *pstate, RPRPatternNode *node, /* Recurse into children */ foreach_node(RPRPatternNode, child, node->children) { - validateRPRPatternVarCount(pstate, child, NULL, varNames); + validateRPRPatternVarCount(pstate, child, varNames); } break; } - - /* - * After the top-level call, validate that every DEFINE variable name is - * present in the PATTERN variable list; reject names not used in PATTERN. - * This is only done once at the outermost recursion level, detected by - * rpDefs being non-NULL (recursive calls pass NULL). - */ - if (rpDefs) - { - foreach_node(ResTarget, rt, rpDefs) - { - bool found = false; - - foreach_node(String, varname, *varNames) - { - if (strcmp(strVal(varname), rt->name) == 0) - { - found = true; - break; - } - } - if (!found) - ereport(ERROR, - errcode(ERRCODE_SYNTAX_ERROR), - errmsg("DEFINE variable \"%s\" is not used in PATTERN", - rt->name), - parser_errposition(pstate, rt->location)); - } - } } /* @@ -296,14 +266,16 @@ validateRPRPatternVarCount(ParseState *pstate, RPRPatternNode *node, * * First: * 1. Validates PATTERN variable count and collects RPR variable names + * 2. Rejects DEFINE variables not used in PATTERN + * 3. Checks for duplicate variable names in DEFINE clause * * Then for each DEFINE variable: - * 2. Checks for duplicate variable names in DEFINE clause - * 3. Transforms expression via transformExpr() and ensures referenced - * Var nodes are present in the query targetlist (via pull_var_clause) - * 4. Creates defineClause entry with proper resname (pattern variable name) - * 5. Coerces expressions to boolean type - * 6. Marks column origins and assigns collation information + * 4. Transforms expression via transformExpr() and coerces it to boolean + * 5. Creates defineClause entry with proper resname (pattern variable name) + * 6. Ensures referenced Var nodes are present in the query targetlist (via + * pull_var_clause) + * + * Finally marks column origins and assigns collation information. * * Note: Variables not in DEFINE are evaluated as TRUE by the executor. * Variables in DEFINE but not in PATTERN are rejected as an error. @@ -316,9 +288,7 @@ static List * transformDefineClause(ParseState *pstate, WindowDef *windef, List **targetlist) { - List *restargets; List *defineClause = NIL; - char *name; List *patternVarNames = NIL; /* @@ -328,56 +298,86 @@ transformDefineClause(ParseState *pstate, WindowDef *windef, Assert(windef->rpCommonSyntax->rpDefs != NULL); /* - * Validate PATTERN variable count, reject DEFINE variables not used in - * PATTERN, and collect PATTERN variable names for transformColumnRef. + * Validate PATTERN variable count and collect the PATTERN variable names + * for transformColumnRef. */ validateRPRPatternVarCount(pstate, windef->rpCommonSyntax->rpPattern, - windef->rpCommonSyntax->rpDefs, &patternVarNames); pstate->p_rpr_pattern_vars = patternVarNames; + /* + * Reject any DEFINE variable whose name does not appear in PATTERN. This + * cross-check only needs to run once, so it lives here in the caller + * rather than in the recursive validateRPRPatternVarCount(). + */ + foreach_node(ResTarget, rt, windef->rpCommonSyntax->rpDefs) + { + bool found = false; + + foreach_node(String, varname, patternVarNames) + { + if (strcmp(strVal(varname), rt->name) == 0) + { + found = true; + break; + } + } + if (!found) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("DEFINE variable \"%s\" is not used in PATTERN", + rt->name), + parser_errposition(pstate, rt->location)); + } + /* * Check for duplicate row pattern definition variables. The standard * requires that no two row pattern definition variable names shall be - * equivalent. + * equivalent. Report the error at the later (duplicate) definition. */ - restargets = NIL; foreach_node(ResTarget, restarget, windef->rpCommonSyntax->rpDefs) { - TargetEntry *teDefine; - Node *expr; - List *vars; - - name = restarget->name; - - foreach_node(ResTarget, r, restargets) + foreach_node(ResTarget, prior, windef->rpCommonSyntax->rpDefs) { - char *n; - - n = r->name; - - if (!strcmp(n, name)) + if (prior == restarget) + break; + if (strcmp(prior->name, restarget->name) == 0) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), errmsg("DEFINE variable \"%s\" appears more than once", - name), - parser_errposition(pstate, exprLocation((Node *) r))); + restarget->name), + parser_errposition(pstate, + exprLocation((Node *) restarget))); } + } - restargets = lappend(restargets, restarget); + foreach_node(ResTarget, restarget, windef->rpCommonSyntax->rpDefs) + { + TargetEntry *teDefine; + Node *expr; + List *vars; /* - * Transform the DEFINE expression. We must NOT add the whole - * expression to the query targetlist, because it may contain - * RPRNavExpr nodes (PREV/NEXT/FIRST/LAST) that can only be evaluated - * inside the owning WindowAgg. - * - * Instead, we transform the expression directly and only ensure that - * the individual Var nodes it references are present in the - * targetlist, so the planner can propagate the referenced columns. + * Transform the DEFINE expression and coerce it to boolean. We must + * NOT add the whole expression to the query targetlist, because it + * may contain RPRNavExpr nodes (PREV/NEXT/FIRST/LAST) that can only + * be evaluated inside the owning WindowAgg. Coercing here, before + * pull_var_clause, keeps pull_var_clause operating on the final + * expression form and surfaces a type mismatch before the targetlist + * is touched. */ expr = transformExpr(pstate, restarget->val, EXPR_KIND_RPR_DEFINE); + expr = coerce_to_boolean(pstate, expr, "DEFINE"); + + /* Build the defineClause entry directly from the transformed expr */ + teDefine = makeTargetEntry((Expr *) expr, + list_length(defineClause) + 1, + pstrdup(restarget->name), + true); + + /* build transformed DEFINE clause (list of TargetEntry) */ + defineClause = lappend(defineClause, teDefine); /* * Pull out Var nodes from the transformed expression and ensure each @@ -412,26 +412,9 @@ transformDefineClause(ParseState *pstate, WindowDef *windef, } } list_free(vars); - - /* Build the defineClause entry directly from the transformed expr */ - teDefine = makeTargetEntry((Expr *) expr, - list_length(defineClause) + 1, - pstrdup(name), - true); - - /* build transformed DEFINE clause (list of TargetEntry) */ - defineClause = lappend(defineClause, teDefine); } - list_free(restargets); pstate->p_rpr_pattern_vars = NIL; - /* - * Make sure that the row pattern definition search condition is a boolean - * expression. - */ - foreach_ptr(TargetEntry, te, defineClause) - te->expr = (Expr *) coerce_to_boolean(pstate, (Node *) te->expr, "DEFINE"); - /* * Validate DEFINE expressions: nested PREV/NEXT, column references, * compound flatten, volatile callees -- all in a single walk per diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index cf158e1c043..80fabde514c 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -236,7 +236,7 @@ WINDOW w AS ( ); ERROR: DEFINE variable "a" appears more than once LINE 7: DEFINE A AS id > 0, A AS id < 10 - ^ + ^ DROP TABLE rpr_dup; -- Boolean coercion CREATE TABLE rpr_bool (id INT, flag BOOLEAN); @@ -319,6 +319,82 @@ DROP CAST (truthyint AS boolean); DROP FUNCTION truthyint_to_bool(truthyint); DROP TYPE truthyint; DROP TABLE rpr_bool; +-- Coercion over a boolean domain is not a no-op; the wrapped Var must still +-- propagate when referenced only in DEFINE (flag is not in the select list) +CREATE DOMAIN boolish AS boolean; +CREATE TABLE rpr_domain (id int, flag boolish); +INSERT INTO rpr_domain VALUES (1, true), (2, false), (3, true); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_domain +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS flag +) +ORDER BY id; + id | cnt +----+----- + 1 | 1 + 2 | 0 + 3 | 1 +(3 rows) + +DROP TABLE rpr_domain; +DROP DOMAIN boolish; +-- A Var referenced only inside a navigation operation must still propagate +-- (val appears only inside PREV(), not as a bare operand or in the select list) +CREATE TABLE rpr_nav (id int, val int); +INSERT INTO rpr_nav VALUES (1, 0), (2, 1), (3, 0), (4, 2); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (UP+) + DEFINE UP AS id > PREV(val) +) +ORDER BY id; + id | cnt +----+----- + 1 | 0 + 2 | 3 + 3 | 0 + 4 | 0 +(4 rows) + +DROP TABLE rpr_nav; +-- A non-boolean DEFINE expression is rejected +CREATE TABLE rpr_noncoerce (id int, n int); +INSERT INTO rpr_noncoerce VALUES (1, 1); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_noncoerce +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS n +); +ERROR: argument of DEFINE must be type boolean, not type integer +LINE 7: DEFINE A AS n + ^ +DROP TABLE rpr_noncoerce; +-- A non-boolean later DEFINE is rejected at its own definition even when an +-- earlier DEFINE variable is valid +CREATE TABLE rpr_noncoerce2 (id int, n int); +INSERT INTO rpr_noncoerce2 VALUES (1, 1); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_noncoerce2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS id > 0, B AS n +); +ERROR: argument of DEFINE must be type boolean, not type integer +LINE 7: DEFINE A AS id > 0, B AS n + ^ +DROP TABLE rpr_noncoerce2; -- Complex expressions CREATE TABLE rpr_complex (id INT, val1 INT, val2 INT); INSERT INTO rpr_complex VALUES (1, 10, 20), (2, 15, 25), (3, 20, 30); diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index e71f0dd3680..21840aa77be 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -261,6 +261,65 @@ DROP TYPE truthyint; DROP TABLE rpr_bool; +-- Coercion over a boolean domain is not a no-op; the wrapped Var must still +-- propagate when referenced only in DEFINE (flag is not in the select list) +CREATE DOMAIN boolish AS boolean; +CREATE TABLE rpr_domain (id int, flag boolish); +INSERT INTO rpr_domain VALUES (1, true), (2, false), (3, true); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_domain +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS flag +) +ORDER BY id; +DROP TABLE rpr_domain; +DROP DOMAIN boolish; + +-- A Var referenced only inside a navigation operation must still propagate +-- (val appears only inside PREV(), not as a bare operand or in the select list) +CREATE TABLE rpr_nav (id int, val int); +INSERT INTO rpr_nav VALUES (1, 0), (2, 1), (3, 0), (4, 2); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (UP+) + DEFINE UP AS id > PREV(val) +) +ORDER BY id; +DROP TABLE rpr_nav; + +-- A non-boolean DEFINE expression is rejected +CREATE TABLE rpr_noncoerce (id int, n int); +INSERT INTO rpr_noncoerce VALUES (1, 1); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_noncoerce +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS n +); +DROP TABLE rpr_noncoerce; + +-- A non-boolean later DEFINE is rejected at its own definition even when an +-- earlier DEFINE variable is valid +CREATE TABLE rpr_noncoerce2 (id int, n int); +INSERT INTO rpr_noncoerce2 VALUES (1, 1); +SELECT id, COUNT(*) OVER w AS cnt +FROM rpr_noncoerce2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS id > 0, B AS n +); +DROP TABLE rpr_noncoerce2; + -- Complex expressions CREATE TABLE rpr_complex (id INT, val1 INT, val2 INT); INSERT INTO rpr_complex VALUES (1, 10, 20), (2, 15, 25), (3, 20, 30); -- 2.50.1 (Apple Git-155)