public inbox for [email protected]
help / color / mirror / Atom feedFrom: Alena Rybakina <[email protected]>
To: Ilia Evdokimov <[email protected]>
Cc: Ranier Vilela <[email protected]>
Cc: PostgreSQL Hackers <[email protected]>
Cc: Peter Petrov <[email protected]>
Cc: David Rowley <[email protected]>
Subject: Re: pull-up subquery if JOIN-ON contains refs to upper-query
Date: Wed, 3 Sep 2025 00:20:46 +0300
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>
<CAEudQAoD707uh5Pjpg5NMyF-QO=fzajA+BmtcoqQAeXN1C+TkQ@mail.gmail.com>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
Hi! Thank you for your attention to this patch!
On 03.09.2025 00:07, Ilia Evdokimov wrote:
> On 04.06.2025 13:40, Alena Rybakina wrote:
>>
>> Do you have any ideas on how to solve this problem? So far, the only
>> approach I see is to try an alternative plan but I'm still learning this.
>>
> Hi,
>
> I've reviewed this patch, and I have suggestion about the approach.
>
> Currently, the patch extends 'convert_EXISTS_sublick_to_join' with
> quite complex logic (clause collection, volatile checks, rewriting
> join quals, etc). While it works, the amount of branching and special
> cases makes the function harder to follow.
>
> Looking at the logic, it seems that a large part of the complexity
> comes from trying to directly adapt 'convert_EXISTS_sublink_to_join'
> instead of factoring out a dedicated path. An alternative would be to
> introduce a separate function
> *'convert_EXISTS_sublink_to_lateral_join' *- with a similar API to
> 'convert_ANY_sublink_to_join'. Such a function can focus only on the
> EXISTS-to-join case, while keeping the existing function shorter and
> easier to reason about.
>
> I even made some first rough sketches of this approach (not a finished
> patch, just an outline). Of course, it would still need proper
> adaptation, but I think it demonstrates that the overall structure can
> be kept simpler.
>
> What do you think about refactoring in this direction?
>
> --
> Best regards,
> Ilia Evdokimov,
> Tantor Labs LLC,
> https://tantorlabs.com
>
I'll look at this. I need some time to consider it.
I'm working on this approach right now. I introduced mutator and made
the transformation if it is possible there but I need to fix some bugs.
Attachments:
[text/x-patch] pull_up.diff (53.3K, 3-pull_up.diff)
download | inline diff:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d71ed958e31..0d2820ca79f 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1439,6 +1439,239 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
return result;
}
+typedef struct HoistJoinQualsContext
+{
+ List *outer_clauses; /* collect hoisted clauses */
+ Relids observed_nulltest_vars;
+} HoistJoinQualsContext;
+
+static Node *
+preprocess_quals(Node *node)
+{
+ /*
+ * Run const-folding without planner context.
+ *
+ * IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying
+ * a *subquery’s* quals before its rtable has been merged with the
+ * parent. If we passed a non-NULL root, eval_const_expressions()
+ * could perform root-dependent transforms (e.g., fold NullTest on Var
+ * using var_is_nonnullable) against the *wrong* rangetable, risking
+ * out-of-bounds RTE access. See eval_const_expressions()’s contract:
+ * “root can be passed as NULL …” for exactly this use-case.
+ */
+ node = eval_const_expressions(NULL, node);
+ node = (Node *) canonicalize_qual((Expr *) node, false);
+
+ node = (Node *) make_ands_implicit((Expr *) node);
+
+ return node;
+}
+
+static NullTest *
+make_nulltest(Var *var, NullTestType type)
+{
+ NullTest *nulltest = makeNode(NullTest);
+ nulltest->arg = (Expr *) var;
+ nulltest->nulltesttype = type;
+ nulltest->argisrow = false;
+ nulltest->location = -1;
+
+ return nulltest;
+}
+
+static bool
+simplicity_check_walker(Node *node, void *ctx)
+{
+ if (node == NULL)
+ {
+ return false;
+ }
+ else if(IsA(node, Var))
+ return true;
+ else if(IsA(node, Query))
+ return query_tree_walker((Query *) node,
+ simplicity_check_walker,
+ (void*) ctx,
+ QTW_EXAMINE_RTES_BEFORE);
+
+ return expression_tree_walker(node, simplicity_check_walker,
+ (void *) ctx);
+}
+
+static List *
+generate_not_null_exprs(List *list_expr, Relids *observed_vars)
+{
+ ListCell *lc;
+ List *result = NIL;
+
+ foreach(lc, list_expr)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ if (IsA(node, OpExpr))
+ {
+ Node *larg = get_leftop(node);
+ Node *rarg = get_rightop(node);
+
+ if (IsA(larg, RelabelType))
+ larg = (Node *) ((RelabelType *) larg)->arg;
+
+ if (IsA(rarg, RelabelType))
+ rarg = (Node *) ((RelabelType *) rarg)->arg;
+
+ if(IsA(larg, Var))
+ {
+ Var *var = (Var *) larg;
+ if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+ {
+ NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+ result = lappend(result, nulltest);
+ *observed_vars = bms_add_member(*observed_vars, var->varno);
+ continue;
+ }
+ }
+
+ if(IsA(rarg, Var))
+ {
+ Var *var = (Var *) rarg;
+ if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+ {
+ NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+ result = lappend(result, nulltest);
+ *observed_vars = bms_add_member(*observed_vars, var->varno);
+ continue;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+static Node *
+hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext *context)
+{
+ if (jtnode == NULL)
+ return NULL;
+
+ if (IsA(jtnode, RangeTblRef))
+ return jtnode; /* nothing to change */
+
+ if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ JoinExpr *newj = makeNode(JoinExpr);
+ ListCell *lc;
+ List *join_clauses = NIL;
+ Node *qual;
+ memcpy(newj, j, sizeof(JoinExpr));
+
+ /* Recurse into join inputs */
+ newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context);
+ newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context);
+
+ if(contain_volatile_functions(newj->quals) ||
+ newj->larg == NULL ||
+ newj->rarg == NULL)
+ return NULL;
+
+ qual = newj->quals;
+ qual = preprocess_quals(qual);
+
+ foreach(lc, (List *) qual)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ if (IsA(node, OpExpr))
+ {
+ if(simplicity_check_walker(get_leftop(node), NULL) &&
+ simplicity_check_walker(get_rightop(node), NULL))
+ {
+ join_clauses = lappend(join_clauses, node);
+ continue;
+ }
+ }
+ context->outer_clauses = lappend(context->outer_clauses, node);
+ }
+
+ /* Only touch INNER JOINs */
+ if ((j->jointype != JOIN_LEFT &&
+ j->jointype != JOIN_RIGHT &&
+ j->jointype != JOIN_FULL)) /* subquery vars */
+ {
+ List *null_tests;
+
+ if (join_clauses == NIL) /* subquery vars */
+ {
+ newj->quals = (Node *) makeBoolConst(true, false);
+ }
+ else if(join_clauses != NIL && contain_vars_of_level((Node *) join_clauses, 1))
+ {
+ null_tests = generate_not_null_exprs(join_clauses, &context->observed_nulltest_vars);
+ context->outer_clauses = list_concat(context->outer_clauses, null_tests);
+ context->outer_clauses = list_concat(context->outer_clauses, join_clauses);
+ newj->quals = (Node *) makeBoolConst(true, false);
+ }
+ else
+ {
+ newj->quals = (Node *) make_ands_explicit(join_clauses);
+ }
+ }
+ else
+ {
+ if (contain_vars_of_level(j->quals, 1))
+ return NULL;
+ }
+
+ return (Node *) newj;
+ }
+
+ if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ FromExpr *newf = makeNode(FromExpr);
+ ListCell *lc;
+ List *fromlist = NIL;
+
+ /* Recurse into fromlist */
+ memcpy(newf, f, sizeof(FromExpr));
+
+ /*
+ * Process children, if any of their jointree contains Vars of the
+ * parent query or quals of their JoinExpr contains volatile functions
+ * then exit
+ */
+ foreach(lc, newf->fromlist)
+ {
+ Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context);
+
+ if (fnode == NULL)
+ return NULL;
+ fromlist = lappend(fromlist, fnode);
+ }
+
+ newf->fromlist = fromlist;
+
+ if(contain_volatile_functions(newf->quals))
+ return NULL;
+
+ if(newf->quals)
+ {
+ Node *qual = newf->quals;
+ /* Quals (WHERE clause) may still contain sublinks etc */
+ qual = preprocess_quals(qual);
+ context->outer_clauses = list_concat(context->outer_clauses, (List *) qual);
+ newf->quals = NULL;
+ }
+
+ return (Node *) newf;
+ }
+
+ return jtnode; /* quiet compiler */
+}
+
+bool pull_up_with_joins = true;
+
/*
* convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
*
@@ -1453,12 +1686,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
JoinExpr *result;
Query *parse = root->parse;
Query *subselect = (Query *) sublink->subselect;
- Node *whereClause;
+ Node *whereClause = NULL;
PlannerInfo subroot;
int rtoffset;
int varno;
Relids clause_varnos;
Relids upper_varnos;
+ List *newWhere = NIL;
Assert(sublink->subLinkType == EXISTS_SUBLINK);
@@ -1488,145 +1722,269 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
if (!simplify_EXISTS_query(root, subselect))
return NULL;
- /*
- * Separate out the WHERE clause. (We could theoretically also remove
- * top-level plain JOIN/ON clauses, but it's probably not worth the
- * trouble.)
- */
- whereClause = subselect->jointree->quals;
- subselect->jointree->quals = NULL;
- /*
- * The rest of the sub-select must not refer to any Vars of the parent
- * query. (Vars of higher levels should be okay, though.)
- */
- if (contain_vars_of_level((Node *) subselect, 1))
- return NULL;
+ if(pull_up_with_joins)
+ {
+ HoistJoinQualsContext hjq_context = {NIL, NULL};
- /*
- * On the other hand, the WHERE clause must contain some Vars of the
- * parent query, else it's not gonna be a join.
- */
- if (!contain_vars_of_level(whereClause, 1))
- return NULL;
+ subselect->jointree = (FromExpr * ) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, &hjq_context);
- /*
- * We don't risk optimizing if the WHERE clause is volatile, either.
- */
- if (contain_volatile_functions(whereClause))
- return NULL;
+ if(subselect->jointree == NULL || hjq_context.outer_clauses == NIL)
+ return NULL;
- /*
- * Scan the rangetable for relation RTEs and retrieve the necessary
- * catalog information for each relation. Using this information, clear
- * the inh flag for any relation that has no children, collect not-null
- * attribute numbers for any relation that has column not-null
- * constraints, and expand virtual generated columns for any relation that
- * contains them.
- *
- * Note: we construct up an entirely dummy PlannerInfo for use here. This
- * is fine because only the "glob" and "parse" links will be used in this
- * case.
- *
- * Note: we temporarily assign back the WHERE clause so that any virtual
- * generated column references within it can be expanded. It should be
- * separated out again afterward.
- */
- MemSet(&subroot, 0, sizeof(subroot));
- subroot.type = T_PlannerInfo;
- subroot.glob = root->glob;
- subroot.parse = subselect;
- subselect->jointree->quals = whereClause;
- subselect = preprocess_relation_rtes(&subroot);
+ newWhere = hjq_context.outer_clauses;
- /*
- * Now separate out the WHERE clause again.
- */
- whereClause = subselect->jointree->quals;
- subselect->jointree->quals = NULL;
+ bms_free(hjq_context.observed_nulltest_vars);
- /*
- * The subquery must have a nonempty jointree, but we can make it so.
- */
- replace_empty_jointree(subselect);
- /*
- * Prepare to pull up the sub-select into top range table.
- *
- * We rely here on the assumption that the outer query has no references
- * to the inner (necessarily true). Therefore this is a lot easier than
- * what pull_up_subqueries has to go through.
- *
- * In fact, it's even easier than what convert_ANY_sublink_to_join has to
- * do. The machinations of simplify_EXISTS_query ensured that there is
- * nothing interesting in the subquery except an rtable and jointree, and
- * even the jointree FromExpr no longer has quals. So we can just append
- * the rtable to our own and use the FromExpr in our jointree. But first,
- * adjust all level-zero varnos in the subquery to account for the rtable
- * merger.
- */
- rtoffset = list_length(parse->rtable);
- OffsetVarNodes((Node *) subselect, rtoffset, 0);
- OffsetVarNodes(whereClause, rtoffset, 0);
+ /*
+ * The subquery must have a nonempty jointree, but we can make it so.
+ */
+ replace_empty_jointree(subselect);
- /*
- * Upper-level vars in subquery will now be one level closer to their
- * parent than before; in particular, anything that had been level 1
- * becomes level zero.
- */
- IncrementVarSublevelsUp((Node *) subselect, -1, 1);
- IncrementVarSublevelsUp(whereClause, -1, 1);
+ /*
+ * Prepare to pull up the sub-select into top range table.
+ *
+ * We rely here on the assumption that the outer query has no references
+ * to the inner (necessarily true). Therefore this is a lot easier than
+ * what pull_up_subqueries has to go through.
+ *
+ * In fact, it's even easier than what convert_ANY_sublink_to_join has to
+ * do. The machinations of simplify_EXISTS_query ensured that there is
+ * nothing interesting in the subquery except an rtable and jointree, and
+ * even the jointree FromExpr no longer has quals. So we can just append
+ * the rtable to our own and use the FromExpr in our jointree. But first,
+ * adjust all level-zero varnos in the subquery to account for the rtable
+ * merger.
+ */
+ rtoffset = list_length(parse->rtable);
+ OffsetVarNodes((Node *) subselect, rtoffset, 0);
- /*
- * Now that the WHERE clause is adjusted to match the parent query
- * environment, we can easily identify all the level-zero rels it uses.
- * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
- * not.
- */
- clause_varnos = pull_varnos(root, whereClause);
- upper_varnos = NULL;
- varno = -1;
- while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
- {
- if (varno <= rtoffset)
- upper_varnos = bms_add_member(upper_varnos, varno);
- }
- bms_free(clause_varnos);
- Assert(!bms_is_empty(upper_varnos));
+ /*
+ * Upper-level vars in subquery will now be one level closer to their
+ * parent than before; in particular, anything that had been level 1
+ * becomes level zero.
+ */
+ IncrementVarSublevelsUp((Node *) subselect, -1, 1);
- /*
- * Now that we've got the set of upper-level varnos, we can make the last
- * check: only available_rels can be referenced.
- */
- if (!bms_is_subset(upper_varnos, available_rels))
- return NULL;
+ OffsetVarNodes((Node *) newWhere, rtoffset, 0);
+ IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
- /*
- * Now we can attach the modified subquery rtable to the parent. This also
- * adds subquery's RTEPermissionInfos into the upper query.
- */
- CombineRangeTables(&parse->rtable, &parse->rteperminfos,
- subselect->rtable, subselect->rteperminfos);
+ /*
+ * Now that the WHERE clause is adjusted to match the parent query
+ * environment, we can easily identify all the level-zero rels it uses.
+ * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+ * not.
+ */
+ clause_varnos = pull_varnos(root, (Node *) newWhere);
+ upper_varnos = NULL;
+ varno = -1;
+ while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+ {
+ if (varno <= rtoffset)
+ upper_varnos = bms_add_member(upper_varnos, varno);
+ }
+ bms_free(clause_varnos);
- /*
- * And finally, build the JoinExpr node.
- */
- result = makeNode(JoinExpr);
- result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
- result->isNatural = false;
- result->larg = NULL; /* caller must fill this in */
- /* flatten out the FromExpr node if it's useless */
- if (list_length(subselect->jointree->fromlist) == 1)
- result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+ /*
+ * Now that we've got the set of upper-level varnos, we can make the last
+ * check: only available_rels can be referenced.
+ */
+ if (!bms_is_empty(upper_varnos) && !bms_is_subset(upper_varnos, available_rels))
+ return NULL;
+
+ /*
+ * Now we can attach the modified subquery rtable to the parent. This also
+ * adds subquery's RTEPermissionInfos into the upper query.
+ */
+ CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+ subselect->rtable, subselect->rteperminfos);
+
+ /*
+ * And finally, build the JoinExpr node.
+ */
+ result = makeNode(JoinExpr);
+ result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+ result->isNatural = false;
+ result->larg = NULL; /* caller must fill this in */
+ /* flatten out the FromExpr node if it's useless */
+ if (list_length(subselect->jointree->fromlist) == 1)
+ result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+ else
+ result->rarg = (Node *) subselect->jointree;
+ result->usingClause = NIL;
+ result->join_using_alias = NULL;
+ result->quals = (Node *) make_ands_explicit(newWhere);
+ result->alias = NULL;
+ result->rtindex = 0; /* we don't need an RTE for it */
+
+ /*
+ * Scan the rangetable for relation RTEs and retrieve the necessary
+ * catalog information for each relation. Using this information, clear
+ * the inh flag for any relation that has no children, collect not-null
+ * attribute numbers for any relation that has column not-null
+ * constraints, and expand virtual generated columns for any relation that
+ * contains them.
+ *
+ * Note: we construct up an entirely dummy PlannerInfo for use here. This
+ * is fine because only the "glob" and "parse" links will be used in this
+ * case.
+ *
+ * Note: we temporarily assign back the WHERE clause so that any virtual
+ * generated column references within it can be expanded. It should be
+ * separated out again afterward.
+ */
+ MemSet(&subroot, 0, sizeof(subroot));
+ subroot.type = T_PlannerInfo;
+ subroot.glob = root->glob;
+ subroot.parse = subselect;
+ subselect->jointree->quals = result->quals;
+ subselect = preprocess_relation_rtes(&subroot);
+
+ return result;
+ }
else
- result->rarg = (Node *) subselect->jointree;
- result->usingClause = NIL;
- result->join_using_alias = NULL;
- result->quals = whereClause;
- result->alias = NULL;
- result->rtindex = 0; /* we don't need an RTE for it */
+ {
+ /*
+ * Separate out the WHERE clause. (We could theoretically also remove
+ * top-level plain JOIN/ON clauses, but it's probably not worth the
+ * trouble.)
+ */
+ whereClause = subselect->jointree->quals;
+ subselect->jointree->quals = NULL;
- return result;
+ /*
+ * The rest of the sub-select must not refer to any Vars of the parent
+ * query. (Vars of higher levels should be okay, though.)
+ */
+ if (contain_vars_of_level((Node *) subselect, 1))
+ return NULL;
+
+ /*
+ * On the other hand, the WHERE clause must contain some Vars of the
+ * parent query, else it's not gonna be a join.
+ */
+ if (!contain_vars_of_level(whereClause, 1))
+ return NULL;
+
+ /*
+ * We don't risk optimizing if the WHERE clause is volatile, either.
+ */
+ if (contain_volatile_functions(whereClause))
+ return NULL;
+
+ /*
+ * Scan the rangetable for relation RTEs and retrieve the necessary
+ * catalog information for each relation. Using this information, clear
+ * the inh flag for any relation that has no children, collect not-null
+ * attribute numbers for any relation that has column not-null
+ * constraints, and expand virtual generated columns for any relation that
+ * contains them.
+ *
+ * Note: we construct up an entirely dummy PlannerInfo for use here. This
+ * is fine because only the "glob" and "parse" links will be used in this
+ * case.
+ *
+ * Note: we temporarily assign back the WHERE clause so that any virtual
+ * generated column references within it can be expanded. It should be
+ * separated out again afterward.
+ */
+ MemSet(&subroot, 0, sizeof(subroot));
+ subroot.type = T_PlannerInfo;
+ subroot.glob = root->glob;
+ subroot.parse = subselect;
+ subselect->jointree->quals = whereClause;
+ subselect = preprocess_relation_rtes(&subroot);
+
+ /*
+ * Now separate out the WHERE clause again.
+ */
+ whereClause = subselect->jointree->quals;
+ subselect->jointree->quals = NULL;
+
+ /*
+ * The subquery must have a nonempty jointree, but we can make it so.
+ */
+ replace_empty_jointree(subselect);
+
+ /*
+ * Prepare to pull up the sub-select into top range table.
+ *
+ * We rely here on the assumption that the outer query has no references
+ * to the inner (necessarily true). Therefore this is a lot easier than
+ * what pull_up_subqueries has to go through.
+ *
+ * In fact, it's even easier than what convert_ANY_sublink_to_join has to
+ * do. The machinations of simplify_EXISTS_query ensured that there is
+ * nothing interesting in the subquery except an rtable and jointree, and
+ * even the jointree FromExpr no longer has quals. So we can just append
+ * the rtable to our own and use the FromExpr in our jointree. But first,
+ * adjust all level-zero varnos in the subquery to account for the rtable
+ * merger.
+ */
+ rtoffset = list_length(parse->rtable);
+ OffsetVarNodes((Node *) subselect, rtoffset, 0);
+ OffsetVarNodes(whereClause, rtoffset, 0);
+
+ /*
+ * Upper-level vars in subquery will now be one level closer to their
+ * parent than before; in particular, anything that had been level 1
+ * becomes level zero.
+ */
+ IncrementVarSublevelsUp((Node *) subselect, -1, 1);
+ IncrementVarSublevelsUp(whereClause, -1, 1);
+
+ /*
+ * Now that the WHERE clause is adjusted to match the parent query
+ * environment, we can easily identify all the level-zero rels it uses.
+ * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+ * not.
+ */
+ clause_varnos = pull_varnos(root, whereClause);
+ upper_varnos = NULL;
+ varno = -1;
+ while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+ {
+ if (varno <= rtoffset)
+ upper_varnos = bms_add_member(upper_varnos, varno);
+ }
+ bms_free(clause_varnos);
+ Assert(!bms_is_empty(upper_varnos));
+
+ /*
+ * Now that we've got the set of upper-level varnos, we can make the last
+ * check: only available_rels can be referenced.
+ */
+ if (!bms_is_subset(upper_varnos, available_rels))
+ return NULL;
+
+ /*
+ * Now we can attach the modified subquery rtable to the parent. This also
+ * adds subquery's RTEPermissionInfos into the upper query.
+ */
+ CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+ subselect->rtable, subselect->rteperminfos);
+
+ /*
+ * And finally, build the JoinExpr node.
+ */
+ result = makeNode(JoinExpr);
+ result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+ result->isNatural = false;
+ result->larg = NULL; /* caller must fill this in */
+ /* flatten out the FromExpr node if it's useless */
+ if (list_length(subselect->jointree->fromlist) == 1)
+ result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+ else
+ result->rarg = (Node *) subselect->jointree;
+ result->usingClause = NIL;
+ result->join_using_alias = NULL;
+ result->quals = whereClause;
+ result->alias = NULL;
+ result->rtindex = 0; /* we don't need an RTE for it */
+
+ return result;
+ }
}
/*
@@ -2959,7 +3317,7 @@ finalize_plan(PlannerInfo *root, Plan *plan,
break;
default:
- elog(ERROR, "unrecognized node type: %d",
+ elog(PANIC, "unrecognized node type: %d",
(int) nodeTag(plan));
}
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f137129209f..0a6ea94c320 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1060,6 +1060,17 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"pull_up_with_joins", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enables the planner's use of exists pull-up with join expressions."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &pull_up_with_joins,
+ true,
+ NULL, NULL, NULL
+ },
+
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474a..c43f2a1a2d2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -427,6 +427,7 @@
#enable_tidscan = on
#enable_group_by_reordering = on
#enable_distinct_reordering = on
+#pull_up_with_joins = on
#enable_self_join_elimination = on
# - Planner Cost Constants -
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 37bc13c2cbd..fa8db1362c7 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -112,6 +112,7 @@ typedef enum
extern PGDLLIMPORT int debug_parallel_query;
extern PGDLLIMPORT bool parallel_leader_participation;
extern PGDLLIMPORT bool enable_distinct_reordering;
+extern PGDLLIMPORT bool pull_up_with_joins;
extern struct PlannedStmt *planner(Query *parse, const char *query_string,
int cursorOptions,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 0563d0cd5a1..e2d78bc80b2 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1019,6 +1019,802 @@ where exists (
where road.name = ss.f1 );
rollback;
--
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON 1 = 1
+ WHERE ta.id = tc.id
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+(6 rows)
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = tb.id
+);
+ QUERY PLAN
+------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (tc.id = ta.id)
+ -> Hash Join
+ Hash Cond: (tb.id = tc.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tb.aval = tc.aid
+ AND tb.aval = ta1.id
+);
+ QUERY PLAN
+----------------------------------------------------
+ Hash Join
+ Hash Cond: (ta1.id = tb.aval)
+ -> Seq Scan on ta ta1
+ -> Hash
+ -> HashAggregate
+ Group Key: tb.aval
+ -> Merge Join
+ Merge Cond: (tb.aval = tc.aid)
+ -> Sort
+ Sort Key: tb.aval
+ -> Seq Scan on tb
+ -> Sort
+ Sort Key: tc.aid
+ -> Seq Scan on tc
+(14 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+ SELECT 1
+ FROM tb tb1
+ JOIN tc ON ta.id = tb.id
+);
+ QUERY PLAN
+------------------------------------
+ Nested Loop Semi Join
+ -> Hash Join
+ Hash Cond: (ta.id = tb.id)
+ -> Seq Scan on ta
+ -> Hash
+ -> Seq Scan on tb
+ -> Nested Loop
+ -> Seq Scan on tb tb1
+ -> Materialize
+ -> Seq Scan on tc
+(10 rows)
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = 1
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Index Only Scan using ta_pkey on ta
+ Index Cond: (id = 1)
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = 1)
+ -> Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND tb.id = 1
+);
+ QUERY PLAN
+-------------------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (tc.id = ta.id)
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = 1)
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+ WHERE ta.val = 1
+);
+ QUERY PLAN
+----------------------------------------------
+ Seq Scan on ta
+ Filter: EXISTS(SubPlan 1)
+ SubPlan 1
+ -> Result
+ One-Time Filter: (ta.val = 1)
+ -> Nested Loop Left Join
+ Join Filter: (ta.id = tc.id)
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on tb
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND tb.aval = ANY ('{1}'::int[])
+);
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (ta.id = tb.id)
+ -> Seq Scan on ta
+ -> Hash
+ -> HashAggregate
+ Group Key: tb.id
+ -> Nested Loop
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on tb
+ Filter: (aval = ANY ('{1}'::integer[]))
+(11 rows)
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.id = ta1.id
+ AND ta1.val = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta1.id
+ )
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta ta2
+ Filter: (val = 1)
+ -> Nested Loop
+ -> Index Only Scan using ta_pkey on ta
+ Index Cond: (id = ta2.id)
+ -> Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.val = ta1.id
+ AND ta1.id = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta.id
+ )
+);
+ QUERY PLAN
+-----------------------------------------------
+ Nested Loop Semi Join
+ -> Index Only Scan using ta_pkey on ta ta1
+ Index Cond: (id = 1)
+ -> Nested Loop
+ -> Seq Scan on tb
+ -> Materialize
+ -> Seq Scan on ta ta2
+ Filter: (val = 1)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + tb.aval > 0
+ )
+);
+ QUERY PLAN
+-----------------------------------------------------
+ Hash Semi Join
+ Hash Cond: (ta.id = tc.id)
+ -> Seq Scan on ta
+ -> Hash
+ -> Hash Join
+ Hash Cond: (tb.id = tc.id)
+ Join Filter: ((tc.aid + tb.aval) > 0)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on tc
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + ta.val > 0
+ )
+);
+ QUERY PLAN
+----------------------------------------
+ Hash Join
+ Hash Cond: (ta.id = tb.id)
+ Join Filter: EXISTS(SubPlan 1)
+ -> Seq Scan on ta
+ -> Hash
+ -> Seq Scan on tb
+ SubPlan 1
+ -> Index Scan using tc_pkey on tc
+ Index Cond: (id = tb.id)
+ Filter: ((aid + ta.val) > 0)
+(10 rows)
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: COALESCE(is_active, true)
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+ QUERY PLAN
+-------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (tb.id = ta.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> HashAggregate
+ Group Key: ta.id
+ -> Nested Loop
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on ta
+ Filter: COALESCE(is_active, true)
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+ QUERY PLAN
+--------------------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: CASE WHEN is_active THEN true ELSE false END
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+ QUERY PLAN
+--------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (tb.id = ta.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> HashAggregate
+ Group Key: ta.id
+ -> Nested Loop
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on ta
+ Filter: CASE WHEN is_active THEN true ELSE false END
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: is_active
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active IS NOT NULL
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: (is_active IS NOT NULL)
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+);
+ QUERY PLAN
+----------------------------------------
+ Seq Scan on ta
+ Filter: EXISTS(SubPlan 1)
+ SubPlan 1
+ -> Nested Loop Left Join
+ Join Filter: (ta.id = tc.id)
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on tb
+(8 rows)
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+);
+ QUERY PLAN
+---------------------------------------------------
+ Result
+ One-Time Filter: (InitPlan 1).col1
+ InitPlan 1
+ -> Nested Loop
+ -> Seq Scan on tb
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = tb.id)
+ -> Seq Scan on ta
+(8 rows)
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+UNION ALL
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+UNION ALL
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+UNION ALL
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id)
+ );
+ QUERY PLAN
+------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (tc.id = ta.id)
+ -> Hash Join
+ Hash Cond: (tb.id = tc.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = ta.id)
+ );
+ QUERY PLAN
+------------------------------------
+ Hash Join
+ Hash Cond: (tc.id = tb.id)
+ -> Hash Join
+ Hash Cond: (tc.id = ta.id)
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+ -> Hash
+ -> Seq Scan on tb
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id
+ AND EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tb.id = ta.id)
+ );
+ QUERY PLAN
+--------------------------------------------
+ Hash Join
+ Hash Cond: (tb.id = ta.id)
+ Join Filter: EXISTS(SubPlan 1)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on ta
+ SubPlan 1
+ -> Result
+ One-Time Filter: (tb.id = ta.id)
+ -> Seq Scan on tc
+(10 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE td.id = ta.id)
+ );
+ QUERY PLAN
+-------------------------------------------------------
+ Hash Join
+ Hash Cond: (ta.id = td.id)
+ -> Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> HashAggregate
+ Group Key: td.id
+ -> Seq Scan on td
+(12 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE tb.id = ta.id)
+ );
+ QUERY PLAN
+--------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ Join Filter: EXISTS(SubPlan 1)
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+ SubPlan 1
+ -> Result
+ One-Time Filter: (tb.id = ta.id)
+ -> Seq Scan on td
+(11 rows)
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tb t2 ON t2.id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM te t4
+ WHERE t4.tc_id = t3.tc_id
+ AND t4.val = t2.aval
+ ) = EXISTS (
+ SELECT 1
+ FROM tc t5
+ WHERE t5.id = t3.id
+ )
+ )
+);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------
+ Merge Semi Join
+ Merge Cond: (t1.id = t2.id)
+ -> Index Scan using ta_pkey on ta t1
+ -> Nested Loop Semi Join
+ Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan 2).col1) AND (t2.aval = (hashed SubPlan 2).col2))) = (ANY (t3.id = (hashed SubPlan 4).col1)))
+ -> Index Scan using tb_pkey on tb t2
+ -> Materialize
+ -> Seq Scan on td t3
+ Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+ SubPlan 2
+ -> Seq Scan on te t4
+ SubPlan 4
+ -> Seq Scan on tc t5
+(13 rows)
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+ AND tb.id = ta.id
+ JOIN td ON td.id = tc.id
+);
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Hash Right Semi Join (cost=181.20..219.35 rows=1100 width=9)
+ Hash Cond: (td.id = ta.id)
+ -> Hash Join (cost=121.70..150.02 rows=1200 width=12)
+ Hash Cond: (td.id = tc.id)
+ -> Hash Join (cost=60.85..86.01 rows=1200 width=8)
+ Hash Cond: (td.id = tb.id)
+ -> Seq Scan on td (cost=0.00..22.00 rows=1200 width=4)
+ -> Hash (cost=32.60..32.60 rows=2260 width=4)
+ -> Seq Scan on tb (cost=0.00..32.60 rows=2260 width=4)
+ -> Hash (cost=32.60..32.60 rows=2260 width=4)
+ -> Seq Scan on tc (cost=0.00..32.60 rows=2260 width=4)
+ -> Hash (cost=32.00..32.00 rows=2200 width=9)
+ -> Seq Scan on ta (cost=0.00..32.00 rows=2200 width=9)
+(13 rows)
+
+DROP TABLE td, te;
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+ FROM generate_series(1, 5) AS g(id);
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst2 t2 ON t2.tst1_id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM tst4 t4
+ WHERE t4.tst3_id = t3.id
+ AND t4.type_id = t2.type_id
+ ) = EXISTS (
+ SELECT 1
+ FROM tst5 t5
+ WHERE t5.tst3_id = t3.id
+ )
+ )
+);
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (t2.tst1_id = t1.id)
+ -> Nested Loop Semi Join
+ Join Filter: (EXISTS(SubPlan 1) = EXISTS(SubPlan 3))
+ -> Seq Scan on tst2 t2
+ -> Materialize
+ -> Seq Scan on tst3 t3
+ Filter: (id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+ SubPlan 1
+ -> Seq Scan on tst4 t4
+ Filter: ((tst3_id = t3.id) AND (type_id = t2.type_id))
+ SubPlan 3
+ -> Seq Scan on tst5 t5
+ Filter: (tst3_id = t3.id)
+ -> Hash
+ -> Seq Scan on tst1 t1
+ Filter: (id IS NOT NULL)
+(17 rows)
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
-- Test case for sublinks pushed down into subselects via join alias expansion
--
select
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 095df0a670c..d0762c1299e 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -3177,14 +3177,12 @@ EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
One-Time Filter: ((InitPlan 1).col1 IS NOT TRUE)
Update on base_tbl
- InitPlan 1
- -> Index Only Scan using base_tbl_pkey on base_tbl t
- Index Cond: (id = 2)
- -> Result
- One-Time Filter: (InitPlan 1).col1
+ -> Nested Loop Semi Join
-> Index Scan using base_tbl_pkey on base_tbl
Index Cond: (id = 2)
-(15 rows)
+ -> Index Scan using base_tbl_pkey on base_tbl t
+ Index Cond: (id = 2)
+(13 rows)
INSERT INTO rw_view1 VALUES (2, 'New row 2');
SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index a6d276a115b..ca31e47c973 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -507,6 +507,449 @@ where exists (
rollback;
--
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON 1 = 1
+ WHERE ta.id = tc.id
+);
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tb.aval = tc.aid
+ AND tb.aval = ta1.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+ SELECT 1
+ FROM tb tb1
+ JOIN tc ON ta.id = tb.id
+);
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND tb.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+ WHERE ta.val = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND tb.aval = ANY ('{1}'::int[])
+);
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.id = ta1.id
+ AND ta1.val = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta1.id
+ )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.val = ta1.id
+ AND ta1.id = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta.id
+ )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + tb.aval > 0
+ )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + ta.val > 0
+ )
+);
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active IS NOT NULL
+);
+
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+);
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+);
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id)
+ );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = ta.id)
+ );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id
+ AND EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tb.id = ta.id)
+ );
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE td.id = ta.id)
+ );
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE tb.id = ta.id)
+ );
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tb t2 ON t2.id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM te t4
+ WHERE t4.tc_id = t3.tc_id
+ AND t4.val = t2.aval
+ ) = EXISTS (
+ SELECT 1
+ FROM tc t5
+ WHERE t5.id = t3.id
+ )
+ )
+);
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+ AND tb.id = ta.id
+ JOIN td ON td.id = tc.id
+);
+
+DROP TABLE td, te;
+
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+ FROM generate_series(1, 5) AS g(id);
+
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst2 t2 ON t2.tst1_id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM tst4 t4
+ WHERE t4.tst3_id = t3.id
+ AND t4.type_id = t2.type_id
+ ) = EXISTS (
+ SELECT 1
+ FROM tst5 t5
+ WHERE t5.tst3_id = t3.id
+ )
+ )
+);
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
+
-- Test case for sublinks pushed down into subselects via join alias expansion
--
view thread (22+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]
Subject: Re: pull-up subquery if JOIN-ON contains refs to upper-query
In-Reply-To: <[email protected]>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox