public inbox for [email protected]  
help / color / mirror / Atom feed
From: Richard Guo <[email protected]>
To: Pg Hackers <[email protected]>
Subject: Convert ALL SubLinks to ANY SubLinks
Date: Thu, 26 Feb 2026 16:11:17 +0900
Message-ID: <CAMbWs48P9d8p_MftXr7ZPb86HvOLu=3ovNz5By85JvSu9epNQA@mail.gmail.com> (raw)

I was reading a paper on null-aware anti-joins (section 6 in [1]) and
noticed that they use <> ALL to represent NOT IN semantics.  We
currently execute ALL SubLinks using a nested-loop SubPlan.  This
made me realize that it could be beneficial to convert ALL SubLinks to
ANY SubLinks.

According to De Morgan's laws:

  foo op ALL (sub-SELECT) => NOT (foo negator_op ANY (sub-SELECT))
  NOT (foo op ALL (sub-SELECT)) => foo negator_op ANY (sub-SELECT)

This conversion makes it possible for the executor to evaluate the
sublink using a hashed SubPlan, which replaces a nested loop with a
more efficient hash probe.  Even better, because our planner already
knows how to pull up ANY SubLinks (and pending the other patch, NOT
ANY SubLinks), these transformed clauses have the chance to be
optimized directly into semijoins or anti-joins.

In the worst-case scenario where the transformed ANY sublink cannot be
pulled up and cannot be hashed, execution falls back to a nested-loop
SubPlan.  Performance in this scenario is effectively identical to the
legacy ALL SubPlan, so there should be no regressions.  Because the
operator is negated, the ANY subplan will short-circuit on the exact
same inner tuple that the ALL subplan would have short-circuited on.
The only added overhead is a single boolean NOT inversion per outer
tuple, which is, IMO, computationally negligible compared to the cost
of the nested-loop execution itself.

Attached is a draft patch illustrating the idea.  Any thoughts or
interest in this transformation?

[1] http://www.vldb.org/pvldb/vol2/vldb09-423.pdf

- Richard


Attachments:

  [application/octet-stream] v1-0001-Convert-ALL-SubLinks-to-ANY-SubLinks.patch (9.3K, 2-v1-0001-Convert-ALL-SubLinks-to-ANY-SubLinks.patch)
  download | inline diff:
From 4e850e37e89a6e8d217403956a043d24ec37b6f2 Mon Sep 17 00:00:00 2001
From: Richard Guo <[email protected]>
Date: Thu, 26 Feb 2026 09:51:57 +0900
Subject: [PATCH v1] Convert ALL SubLinks to ANY SubLinks

---
 src/backend/optimizer/prep/prepjointree.c | 169 +++++++++++++++++++++-
 src/test/regress/expected/subselect.out   |  28 ++++
 src/test/regress/sql/subselect.sql        |  12 ++
 3 files changed, 206 insertions(+), 3 deletions(-)

diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index c90f4b32733..008bb469d2d 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -44,6 +44,7 @@
 #include "parser/parsetree.h"
 #include "rewrite/rewriteHandler.h"
 #include "rewrite/rewriteManip.h"
+#include "utils/lsyscache.h"
 #include "utils/rel.h"
 
 
@@ -113,6 +114,7 @@ static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 										   Node **jtlink1, Relids available_rels1,
 										   Node **jtlink2, Relids available_rels2);
+static Node *negate_sublink_testexpr(Node *testexpr);
 static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
 										JoinExpr *lowest_outer_join,
 										AppendRelInfo *containing_appendrel);
@@ -621,7 +623,7 @@ replace_empty_jointree(Query *parse)
 
 /*
  * pull_up_sublinks
- *		Attempt to pull up ANY and EXISTS SubLinks to be treated as
+ *		Attempt to pull up ANY, EXISTS and ALL SubLinks to be treated as
  *		semijoins or anti-semijoins.
  *
  * A clause "foo op ANY (sub-SELECT)" can be processed by pulling the
@@ -639,6 +641,12 @@ replace_empty_jointree(Query *parse)
  * Under similar conditions, EXISTS and NOT EXISTS clauses can be handled
  * by pulling up the sub-SELECT and creating a semijoin or anti-semijoin.
  *
+ * A clause "foo op ALL (sub-SELECT)" can be logically rewritten into "NOT (foo
+ * negator_op ANY (sub-SELECT))".  This conversion makes it possible for the
+ * executor to evaluate the unflattened sublink using a hashed SubPlan.
+ * Furthermore, it exposes the sublink to the standard pull-up machinery,
+ * potentially flattening it into a semijoin or anti-semijoin.
+ *
  * This routine searches for such clauses and does the necessary parsetree
  * transformations if any are found.
  *
@@ -844,7 +852,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 		JoinExpr   *j;
 		Relids		child_rels;
 
-		/* Is it a convertible ANY or EXISTS clause? */
+		/* Is it a convertible ANY, EXISTS or ALL clause? */
 		if (sublink->subLinkType == ANY_SUBLINK)
 		{
 			ScalarArrayOpExpr *saop;
@@ -965,12 +973,37 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 				return NULL;
 			}
 		}
+		else if (sublink->subLinkType == ALL_SUBLINK)
+		{
+			Node	   *negated_expr = negate_sublink_testexpr(sublink->testexpr);
+
+			if (negated_expr != NULL)
+			{
+				SubLink    *any_sublink = copyObject(sublink);
+				Node	   *not_expr;
+
+				any_sublink->testexpr = negated_expr;
+				any_sublink->subLinkType = ANY_SUBLINK;
+				/* XXX should we update operName accordingly */
+
+				not_expr = (Node *) makeBoolExpr(NOT_EXPR,
+												 list_make1(any_sublink),
+												 any_sublink->location);
+
+				return pull_up_sublinks_qual_recurse(root,
+													 not_expr,
+													 jtlink1,
+													 available_rels1,
+													 jtlink2,
+													 available_rels2);
+			}
+		}
 		/* Else return it unmodified */
 		return node;
 	}
 	if (is_notclause(node))
 	{
-		/* If the immediate argument of NOT is EXISTS, try to convert */
+		/* If the immediate argument of NOT is EXISTS or ALL, try to convert */
 		SubLink    *sublink = (SubLink *) get_notclausearg((Expr *) node);
 		JoinExpr   *j;
 		Relids		child_rels;
@@ -1031,6 +1064,26 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 					return NULL;
 				}
 			}
+			else if (sublink->subLinkType == ALL_SUBLINK)
+			{
+				Node	   *negated_expr = negate_sublink_testexpr(sublink->testexpr);
+
+				if (negated_expr != NULL)
+				{
+					SubLink    *any_sublink = copyObject(sublink);
+
+					any_sublink->testexpr = negated_expr;
+					any_sublink->subLinkType = ANY_SUBLINK;
+					/* XXX should we update operName accordingly */
+
+					return pull_up_sublinks_qual_recurse(root,
+														 (Node *) any_sublink,
+														 jtlink1,
+														 available_rels1,
+														 jtlink2,
+														 available_rels2);
+				}
+			}
 		}
 		/* Else return it unmodified */
 		return node;
@@ -1067,6 +1120,116 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 	return node;
 }
 
+/*
+ * negate_sublink_testexpr
+ *		Attempt to logically negate the testexpr of an ALL_SUBLINK.
+ *
+ * This helper is used to transform ALL sublinks into ANY sublinks.  It returns
+ * a newly allocated negated expression tree, or NULL if negation is not
+ * possible.
+ */
+static Node *
+negate_sublink_testexpr(Node *testexpr)
+{
+	if (testexpr == NULL)
+		return NULL;
+	if (IsA(testexpr, OpExpr))
+	{
+		/* single-column comparison */
+		OpExpr	   *opexpr = (OpExpr *) testexpr;
+		Oid			negator = get_negator(opexpr->opno);
+
+		if (OidIsValid(negator))
+		{
+			OpExpr	   *newopexpr = copyObject(opexpr);
+
+			newopexpr->opno = negator;
+			newopexpr->opfuncid = InvalidOid;
+
+			return (Node *) newopexpr;
+		}
+	}
+	else if (is_andclause(testexpr) || is_orclause(testexpr))
+	{
+		/* multi-column equality or inequality checks */
+		BoolExpr   *bexpr = (BoolExpr *) testexpr;
+		List	   *nargs = NIL;
+		ListCell   *lc;
+
+		/*--------------------
+		 * Apply DeMorgan's Laws:
+		 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
+		 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
+		 * i.e., swap AND for OR and negate each subclause.
+		 *--------------------
+		 */
+		foreach(lc, bexpr->args)
+		{
+			Node	   *negated_arg = negate_sublink_testexpr((Node *) lfirst(lc));
+
+			if (negated_arg == NULL)
+				return NULL;
+
+			nargs = lappend(nargs, negated_arg);
+		}
+
+		return (bexpr->boolop == AND_EXPR) ?
+			(Node *) makeBoolExpr(OR_EXPR, nargs, bexpr->location) :
+			(Node *) makeBoolExpr(AND_EXPR, nargs, bexpr->location);
+	}
+	else if (IsA(testexpr, RowCompareExpr))
+	{
+		/* multi-column ordering checks */
+		RowCompareExpr *rcexpr = (RowCompareExpr *) testexpr;
+		RowCompareExpr *newrcexpr;
+		List	   *negated_opnos = NIL;
+		CompareType negated_cmptype;
+
+		foreach_oid(opno, rcexpr->opnos)
+		{
+			Oid			negator = get_negator(opno);
+
+			if (!OidIsValid(negator))
+				return NULL;
+
+			negated_opnos = lappend_oid(negated_opnos, negator);
+		}
+
+		switch (rcexpr->cmptype)
+		{
+			case COMPARE_LT:
+				negated_cmptype = COMPARE_GE;
+				break;
+			case COMPARE_LE:
+				negated_cmptype = COMPARE_GT;
+				break;
+			case COMPARE_EQ:
+				negated_cmptype = COMPARE_NE;
+				break;
+			case COMPARE_GE:
+				negated_cmptype = COMPARE_LT;
+				break;
+			case COMPARE_GT:
+				negated_cmptype = COMPARE_LE;
+				break;
+			case COMPARE_NE:
+				negated_cmptype = COMPARE_EQ;
+				break;
+			default:
+				return NULL;
+				break;
+		}
+
+		newrcexpr = copyObject(rcexpr);
+		newrcexpr->opnos = negated_opnos;
+		newrcexpr->cmptype = negated_cmptype;
+
+		return (Node *) newrcexpr;
+	}
+
+	return NULL;
+}
+
 /*
  * preprocess_function_rtes
  *		Constant-simplify any FUNCTION RTEs in the FROM clause, and then
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 2135d82884d..654422ca952 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -3323,3 +3323,31 @@ SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));
  Seq Scan on onek t
 (1 row)
 
+--
+-- Test ALL SubLink to ANY SubLink transformation
+--
+-- Ensure we get a hashed ANY-SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 t1 WHERE two <> ALL (SELECT two FROM tenk1 t2);
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Seq Scan on tenk1 t1
+   Filter: (NOT (ANY (two = (hashed SubPlan any_1).col1)))
+   SubPlan any_1
+     ->  Seq Scan on tenk1 t2
+(4 rows)
+
+-- Ensure we get a join
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 t1 WHERE NOT two <> ALL (SELECT two FROM tenk1 t2);
+               QUERY PLAN               
+----------------------------------------
+ Hash Join
+   Hash Cond: (t1.two = t2.two)
+   ->  Seq Scan on tenk1 t1
+   ->  Hash
+         ->  HashAggregate
+               Group Key: t2.two
+               ->  Seq Scan on tenk1 t2
+(7 rows)
+
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index cadc3293687..58be939dd20 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -1448,3 +1448,15 @@ SELECT * FROM onek t1, lateral (SELECT * FROM onek t2 WHERE t2.ten IN (values (t
 -- VtA causes the whole expression to be evaluated as a constant
 EXPLAIN (COSTS OFF)
 SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));
+
+--
+-- Test ALL SubLink to ANY SubLink transformation
+--
+
+-- Ensure we get a hashed ANY-SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 t1 WHERE two <> ALL (SELECT two FROM tenk1 t2);
+
+-- Ensure we get a join
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 t1 WHERE NOT two <> ALL (SELECT two FROM tenk1 t2);
-- 
2.39.5 (Apple Git-154)



view thread (2+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected]
  Subject: Re: Convert ALL SubLinks to ANY SubLinks
  In-Reply-To: <CAMbWs48P9d8p_MftXr7ZPb86HvOLu=3ovNz5By85JvSu9epNQA@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox