From dd33dbd7ec0fe11b76d61d40e687a2b7ae6156f4 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 22 Apr 2026 17:28:23 +0300 Subject: [PATCH v2] Deduplicate RestrictInfos differing only in outer_relids in SJE During self-join elimination, an EC-derived IS NOT NULL clause from an inner join and an identical clause originating from an enclosing outer join's ON clause may both be moved to the surviving relation. They are logically equivalent but carry different outer_relids / incompatible_relids (placement constraints, not semantic content), so restrict_infos_logically_equal() used to keep both, producing a duplicated filter in the plan. Rather than enumerate the fields that participate in logical equality by hand, restrict_infos_logically_equal() now leans on the autogenerated equal() and only suppresses the few fields that may legitimately differ between two equivalent RestrictInfos: rinfo_serial (identifier only) and outer_relids / incompatible_relids (placement constraints). All other fields are either compared by equal() or already tagged pg_node_attr(equal_ignore) and skipped automatically, so a newly added semantically significant field is picked up here without changing this function. A comment near the RestrictInfo struct points back at this function for future maintainers. When a duplicate is found, merge outer_relids and incompatible_relids of the two RestrictInfos by union into the surviving one via the new merge_logically_equal_restrictinfo() helper, so the kept clause carries the strictest placement allowed by either original. Apply the same dedup-and-merge step to update_eclasses() so EC-source clauses that converge after relid substitution are handled consistently with add_non_redundant_clauses(). Reviewed-by: Tender Wang Reviewed-by: Andrei Lepikhov Bug: #19435 --- src/backend/optimizer/plan/analyzejoins.c | 99 ++++++++++++++++++++--- src/include/nodes/pathnodes.h | 7 ++ src/test/regress/expected/join.out | 20 +++++ src/test/regress/sql/join.sql | 10 +++ 4 files changed, 124 insertions(+), 12 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index b07cb731401..2a96d3aaf1f 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -79,6 +79,9 @@ static bool is_innerrel_unique_for(PlannerInfo *root, static int self_join_candidates_cmp(const void *a, const void *b); static bool replace_relid_callback(Node *node, ChangeVarNodes_context *context); +static bool restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b); +static void merge_logically_equal_restrictinfo(RestrictInfo *dst, + RestrictInfo *src); /* @@ -1645,14 +1648,21 @@ update_eclasses(EquivalenceClass *ec, int from, int to) * After switching the clause to the remaining relation, check it for * redundancy with existing ones. We don't have to check for * redundancy with derived clauses, because we've just deleted them. + * + * Use restrict_infos_logically_equal() so the comparison stays in + * sync with add_non_redundant_clauses(): equal clauses that differ + * only in their outer_relids / incompatible_relids are deduplicated + * and the placement constraints of the dropped clause are merged into + * the survivor. */ foreach_node(RestrictInfo, other, new_sources) { if (!equal(rinfo->clause_relids, other->clause_relids)) continue; - if (equal(rinfo->clause, other->clause)) + if (restrict_infos_logically_equal(rinfo, other)) { + merge_logically_equal_restrictinfo(other, rinfo); is_redundant = true; break; } @@ -1668,27 +1678,79 @@ update_eclasses(EquivalenceClass *ec, int from, int to) } /* - * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field, - * which makes almost every RestrictInfo unique. This type of comparison is - * useful when removing duplicates while moving RestrictInfo's from removed - * relation to remaining relation during self-join elimination. + * Compare two RestrictInfos for logical equivalence during self-join + * elimination dedup. Two clauses are logically equivalent if they apply + * the same test at the same semantic placement level, i.e. their clause + * content and the fields that determine when/where the filter must run + * all match. + * + * Rather than enumerate the fields to compare, we lean on the + * autogenerated equal() function and only override the few fields that + * may legitimately differ between two equivalent RestrictInfos: + * + * - rinfo_serial is only an identifier and does not affect semantics; * - * XXX: In the future, we might remove the 'rinfo_serial' field completely and - * get rid of this function. + * - outer_relids and incompatible_relids are placement *constraints* + * rather than content. The same logical filter can arise from + * different levels of the join tree (e.g. an EC-derived IS NOT NULL + * from an inner join vs. an original ON-clause from an enclosing + * outer join) with different constraint sets. Callers must union + * these constraints when treating one clause as redundant with the + * other, so the surviving clause carries the strictest placement + * allowed by either original. See add_non_redundant_clauses() and + * update_eclasses(). + * + * All other fields are either compared by equal() or already marked + * pg_node_attr(equal_ignore) in the RestrictInfo struct definition (and + * thus skipped automatically), so a new semantically significant field + * added to RestrictInfo will be picked up here without any change. If + * the new field is itself a placement constraint that may legitimately + * differ between equivalent clauses, extend the save/restore list below + * and update the union step in the callers; the comment near + * RestrictInfo in pathnodes.h points back here. */ static bool restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b) { - int saved_rinfo_serial = a->rinfo_serial; + int saved_rinfo_serial_a = a->rinfo_serial; + int saved_rinfo_serial_b = b->rinfo_serial; + Relids saved_outer_relids_a = a->outer_relids; + Relids saved_outer_relids_b = b->outer_relids; + Relids saved_incompatible_relids_a = a->incompatible_relids; + Relids saved_incompatible_relids_b = b->incompatible_relids; bool result; - a->rinfo_serial = b->rinfo_serial; + a->rinfo_serial = b->rinfo_serial = 0; + a->outer_relids = b->outer_relids = NULL; + a->incompatible_relids = b->incompatible_relids = NULL; + result = equal(a, b); - a->rinfo_serial = saved_rinfo_serial; + + a->rinfo_serial = saved_rinfo_serial_a; + b->rinfo_serial = saved_rinfo_serial_b; + a->outer_relids = saved_outer_relids_a; + b->outer_relids = saved_outer_relids_b; + a->incompatible_relids = saved_incompatible_relids_a; + b->incompatible_relids = saved_incompatible_relids_b; return result; } +/* + * Merge placement constraints (outer_relids and incompatible_relids) of + * 'src' into 'dst' when 'src' is being dropped as redundant with 'dst'. + * Union is the strictest common constraint: any placement allowed by both + * originals remains allowed, so keeping only the merged clause reproduces + * the same filtering without relaxing any prohibition. + */ +static void +merge_logically_equal_restrictinfo(RestrictInfo *dst, RestrictInfo *src) +{ + dst->outer_relids = bms_union(dst->outer_relids, src->outer_relids); + dst->incompatible_relids = bms_union(dst->incompatible_relids, + src->incompatible_relids); +} + /* * This function adds all non-redundant clauses to the keeping relation * during self-join elimination. That is a contradictory operation. On the @@ -1699,6 +1761,13 @@ restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b) * would have been better to avoid calling the equal() function here, but * it's the only way to detect duplicated inequality expressions. * + * When a candidate is found to be logically equivalent to an already-kept + * clause but carries different outer_relids / incompatible_relids, those + * placement-constraint sets are merged by union into the surviving + * clause. The union is the strictest common constraint: any placement + * allowed by both originals remains allowed, so keeping only the merged + * clause reproduces the same filtering without relaxing any prohibition. + * * (*keep_rinfo_list) is given by pointer because it might be altered by * distribute_restrictinfo_to_rels(). */ @@ -1722,9 +1791,15 @@ add_non_redundant_clauses(PlannerInfo *root, if (src == rinfo || (rinfo->parent_ec != NULL && - src->parent_ec == rinfo->parent_ec) || - restrict_infos_logically_equal(rinfo, src)) + src->parent_ec == rinfo->parent_ec)) + { + is_redundant = true; + break; + } + + if (restrict_infos_logically_equal(rinfo, src)) { + merge_logically_equal_restrictinfo(src, rinfo); is_redundant = true; break; } diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 27a2c6815b7..b47a632c277 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2889,6 +2889,13 @@ typedef struct LimitPath * * parent_ec, left_ec, right_ec are not printed, lest it lead to infinite * recursion in plan tree dump. + * + * If you add a new field to RestrictInfo that affects semantic or + * placement-level equality, also revisit restrict_infos_logically_equal() + * and the dedup callers that rely on it (add_non_redundant_clauses, + * update_eclasses). Fields that are caches or otherwise irrelevant to + * equality should be tagged pg_node_attr(equal_ignore) so they are skipped + * automatically. */ typedef struct RestrictInfo diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 78bf022f7b4..2635e26afd8 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -8147,6 +8147,26 @@ SELECT 1 AS c1 FROM sl sl1 LEFT JOIN (sl AS sl2 NATURAL JOIN sl AS sl3) -> Seq Scan on sl sl4 (7 rows) +-- An EC-derived IS NOT NULL (from the NATURAL JOIN's self-join removal) may +-- collide with an identical IS NOT NULL originating from an enclosing outer +-- join's ON clause. The two clauses differ only in outer_relids, so they +-- must be merged (by union) rather than kept as duplicates. Bug #19435. +EXPLAIN (COSTS OFF) +SELECT 1 AS c1 FROM (sl AS sl0 RIGHT JOIN + ((sl AS sl1 NATURAL JOIN sl AS sl2) + RIGHT JOIN sl AS sl3 ON sl1.bool_col IS NOT NULL) + ON sl1.bool_col); + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Nested Loop Left Join + -> Seq Scan on sl sl3 + -> Nested Loop Left Join + Join Filter: sl2.bool_col + -> Seq Scan on sl sl2 + Filter: ((bool_col IS NOT NULL) AND (a IS NOT NULL) AND (b IS NOT NULL) AND (c IS NOT NULL)) + -> Seq Scan on sl sl0 +(7 rows) + -- Check optimization disabling if it will violate special join conditions. -- Two identical joined relations satisfies self join removal conditions but -- stay in different special join infos. diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index fae19113cef..89c54a6e172 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -3185,6 +3185,16 @@ EXPLAIN (COSTS OFF) SELECT 1 AS c1 FROM sl sl1 LEFT JOIN (sl AS sl2 NATURAL JOIN sl AS sl3) ON sl2.bool_col LEFT JOIN sl AS sl4 ON sl2.bool_col; +-- An EC-derived IS NOT NULL (from the NATURAL JOIN's self-join removal) may +-- collide with an identical IS NOT NULL originating from an enclosing outer +-- join's ON clause. The two clauses differ only in outer_relids, so they +-- must be merged (by union) rather than kept as duplicates. Bug #19435. +EXPLAIN (COSTS OFF) +SELECT 1 AS c1 FROM (sl AS sl0 RIGHT JOIN + ((sl AS sl1 NATURAL JOIN sl AS sl2) + RIGHT JOIN sl AS sl3 ON sl1.bool_col IS NOT NULL) + ON sl1.bool_col); + -- Check optimization disabling if it will violate special join conditions. -- Two identical joined relations satisfies self join removal conditions but -- stay in different special join infos. -- 2.39.5 (Apple Git-154)