Re: A very quick observation of dangling pointers in Postgres pathlists

public inbox for [email protected]  
help / color / mirror / Atom feed

From: Andrei Lepikhov <[email protected]>
To: PostgreSQL Hackers <[email protected]>
To: Tom Lane <[email protected]>
Subject: Re: A very quick observation of dangling pointers in Postgres pathlists
Date: Tue, 21 Apr 2026 09:29:01 +0200
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>

On 17/04/2026 10:56, Andrei Lepikhov wrote:
> The best-known problematic code example causing this issue is
> apply_scanjoin_target_to_paths(), and the current_rel/final_rel game from commit
> 0927d2f46dd.  Quickly fixing it, I see some more combinations have emerged:

On closer inspection, it looks like all the detected cases come from the same
issue in create_ordered_paths. The ordered_rel has the same path in its pathlist
as the input_rel. Sometimes, this path is removed and freed from ordered_rel,
which leads to a dangling pointer in the child RelOptInfo.

I've attached a patch that shows how to fix the issue. Some regression tests
change because of a hidden rule where a projection and its subpath have
different target lists. Right now, the patch always enforces a projection, even
if the target lists are the same. This is still open for discussion on whether
there's a better way to handle it.

-- 
regards, Andrei Lepikhov,
pgEdge
From 3bbde842ad2da44acd47170b3e9949f621102d50 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <[email protected]>
Date: Mon, 20 Apr 2026 17:25:27 +0200
Subject: [PATCH v0] Do not put one path into different pathlists

---
 src/backend/optimizer/plan/planner.c          | 34 ++++++++++++++-----
 src/test/regress/expected/limit.out           |  6 ++--
 .../regress/expected/select_distinct_on.out   | 26 +++++++-------
 src/test/regress/expected/select_parallel.out | 32 ++++++++---------
 src/test/regress/expected/tsrf.out            |  8 ++---
 5 files changed, 60 insertions(+), 46 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 56bb1d798e3..cd3250c9672 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5462,7 +5462,20 @@ create_ordered_paths(PlannerInfo *root,
 												input_path->pathkeys, &presorted_keys);
 
 		if (is_sorted)
-			sorted_path = input_path;
+		{
+			/*
+			 * The input_path is already sorted; we would like to reuse it as
+			 * the ordered rel's path.  But we must not share the pointer with
+			 * input_rel->pathlist.  Wrap it in a fresh ProjectionPath.
+			 */
+			Path	   *wrap_target = input_path;
+
+			if (IsA(wrap_target, ProjectionPath))
+				wrap_target = ((ProjectionPath *) wrap_target)->subpath;
+
+			sorted_path = (Path *) create_projection_path(root, ordered_rel,
+														  wrap_target, target);
+		}
 		else
 		{
 			/*
@@ -5494,15 +5507,18 @@ create_ordered_paths(PlannerInfo *root,
 																	root->sort_pathkeys,
 																	presorted_keys,
 																	limit_tuples);
-		}
 
-		/*
-		 * If the pathtarget of the result path has different expressions from
-		 * the target to be applied, a projection step is needed.
-		 */
-		if (!equal(sorted_path->pathtarget->exprs, target->exprs))
-			sorted_path = apply_projection_to_path(root, ordered_rel,
-												   sorted_path, target);
+			/*
+			 * If the pathtarget of the result path has different expressions
+			 * from the target to be applied, a projection step is needed.
+			 * When is_sorted is true the wrap above already carries the
+			 * ordered rel's target, so this only applies to the sorted
+			 * branch.
+			 */
+			if (!equal(sorted_path->pathtarget->exprs, target->exprs))
+				sorted_path = apply_projection_to_path(root, ordered_rel,
+													   sorted_path, target);
+		}
 
 		add_path(ordered_rel, sorted_path);
 	}
diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out
index e3bcc680653..c12b2498f65 100644
--- a/src/test/regress/expected/limit.out
+++ b/src/test/regress/expected/limit.out
@@ -439,14 +439,14 @@ select currval('testseq');
 explain (verbose, costs off)
 select unique1, unique2, generate_series(1,10)
   from tenk1 order by unique2 limit 7;
-                                                                         QUERY PLAN                                                                          
--------------------------------------------------------------------------------------------------------------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
  Limit
    Output: unique1, unique2, (generate_series(1, 10))
    ->  ProjectSet
          Output: unique1, unique2, generate_series(1, 10)
          ->  Index Scan using tenk1_unique2 on public.tenk1
-               Output: unique1, unique2, two, four, ten, twenty, hundred, thousand, twothousand, fivethous, tenthous, odd, even, stringu1, stringu2, string4
+               Output: unique1, unique2
 (6 rows)
 
 select unique1, unique2, generate_series(1,10)
diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out
index 75b1e7d300f..4ae09c8b181 100644
--- a/src/test/regress/expected/select_distinct_on.out
+++ b/src/test/regress/expected/select_distinct_on.out
@@ -81,12 +81,13 @@ select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT ON (four) four,two
    FROM tenk1 WHERE four = 0 ORDER BY 1;
-         QUERY PLAN         
-----------------------------
- Limit
-   ->  Seq Scan on tenk1
-         Filter: (four = 0)
-(3 rows)
+            QUERY PLAN            
+----------------------------------
+ Result
+   ->  Limit
+         ->  Seq Scan on tenk1
+               Filter: (four = 0)
+(4 rows)
 
 -- and check the result of the above query is correct
 SELECT DISTINCT ON (four) four,two
@@ -114,12 +115,13 @@ SELECT DISTINCT ON (four) four,two
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT ON (four) four,hundred
    FROM tenk1 WHERE four = 0 ORDER BY 1,2;
-                  QUERY PLAN                   
------------------------------------------------
- Limit
-   ->  Index Scan using tenk1_hundred on tenk1
-         Filter: (four = 0)
-(3 rows)
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Result
+   ->  Limit
+         ->  Index Scan using tenk1_hundred on tenk1
+               Filter: (four = 0)
+(4 rows)
 
 --
 -- Test the planner's ability to reorder the distinctClause Pathkeys to match
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 933921d1860..a3d6f3d4576 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -753,20 +753,18 @@ end;
 $$ language plpgsql PARALLEL SAFE;
 explain (costs off, verbose)
     select ten, sp_simple_func(ten) from tenk1 where ten < 100 order by ten;
-                     QUERY PLAN                      
------------------------------------------------------
+                  QUERY PLAN                   
+-----------------------------------------------
  Gather Merge
-   Output: ten, (sp_simple_func(ten))
+   Output: ten, sp_simple_func(ten)
    Workers Planned: 4
-   ->  Result
-         Output: ten, sp_simple_func(ten)
-         ->  Sort
+   ->  Sort
+         Output: ten
+         Sort Key: tenk1.ten
+         ->  Parallel Seq Scan on public.tenk1
                Output: ten
-               Sort Key: tenk1.ten
-               ->  Parallel Seq Scan on public.tenk1
-                     Output: ten
-                     Filter: (tenk1.ten < 100)
-(11 rows)
+               Filter: (tenk1.ten < 100)
+(9 rows)
 
 drop function sp_simple_func(integer);
 -- test handling of SRFs in targetlist (bug in 10.0)
@@ -1261,18 +1259,16 @@ SELECT generate_series(1, two), array(select generate_series(1, two))
    ->  Gather Merge
          Output: tenk1.two, tenk1.tenthous
          Workers Planned: 4
-         ->  Result
-               Output: tenk1.two, tenk1.tenthous
-               ->  Sort
+         ->  Sort
+               Output: tenk1.tenthous, tenk1.two
+               Sort Key: tenk1.tenthous
+               ->  Parallel Seq Scan on public.tenk1
                      Output: tenk1.tenthous, tenk1.two
-                     Sort Key: tenk1.tenthous
-                     ->  Parallel Seq Scan on public.tenk1
-                           Output: tenk1.tenthous, tenk1.two
    SubPlan array_1
      ->  ProjectSet
            Output: generate_series(1, tenk1.two)
            ->  Result
-(16 rows)
+(14 rows)
 
 -- must disallow pushing sort below gather when pathkey contains an SRF
 EXPLAIN (VERBOSE, COSTS OFF)
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index c4f7b187f5b..a0d295859ed 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -459,12 +459,12 @@ reset enable_hashagg;
 -- case with degenerate ORDER BY
 explain (verbose, costs off)
 select 'foo' as f, generate_series(1,2) as g from few order by 1;
-                  QUERY PLAN                  
-----------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  ProjectSet
-   Output: 'foo'::text, generate_series(1, 2)
+   Output: ('foo'::text), generate_series(1, 2)
    ->  Seq Scan on public.few
-         Output: id, dataa, datab
+         Output: 'foo'::text
 (4 rows)
 
 select 'foo' as f, generate_series(1,2) as g from few order by 1;
-- 
2.53.0



Attachments:

  [text/plain] v0-0001-Do-not-put-one-path-into-different-pathlists.patch (8.2K, 2-v0-0001-Do-not-put-one-path-into-different-pathlists.patch)
  download | inline diff:
From 3bbde842ad2da44acd47170b3e9949f621102d50 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <[email protected]>
Date: Mon, 20 Apr 2026 17:25:27 +0200
Subject: [PATCH v0] Do not put one path into different pathlists

---
 src/backend/optimizer/plan/planner.c          | 34 ++++++++++++++-----
 src/test/regress/expected/limit.out           |  6 ++--
 .../regress/expected/select_distinct_on.out   | 26 +++++++-------
 src/test/regress/expected/select_parallel.out | 32 ++++++++---------
 src/test/regress/expected/tsrf.out            |  8 ++---
 5 files changed, 60 insertions(+), 46 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 56bb1d798e3..cd3250c9672 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5462,7 +5462,20 @@ create_ordered_paths(PlannerInfo *root,
 												input_path->pathkeys, &presorted_keys);
 
 		if (is_sorted)
-			sorted_path = input_path;
+		{
+			/*
+			 * The input_path is already sorted; we would like to reuse it as
+			 * the ordered rel's path.  But we must not share the pointer with
+			 * input_rel->pathlist.  Wrap it in a fresh ProjectionPath.
+			 */
+			Path	   *wrap_target = input_path;
+
+			if (IsA(wrap_target, ProjectionPath))
+				wrap_target = ((ProjectionPath *) wrap_target)->subpath;
+
+			sorted_path = (Path *) create_projection_path(root, ordered_rel,
+														  wrap_target, target);
+		}
 		else
 		{
 			/*
@@ -5494,15 +5507,18 @@ create_ordered_paths(PlannerInfo *root,
 																	root->sort_pathkeys,
 																	presorted_keys,
 																	limit_tuples);
-		}
 
-		/*
-		 * If the pathtarget of the result path has different expressions from
-		 * the target to be applied, a projection step is needed.
-		 */
-		if (!equal(sorted_path->pathtarget->exprs, target->exprs))
-			sorted_path = apply_projection_to_path(root, ordered_rel,
-												   sorted_path, target);
+			/*
+			 * If the pathtarget of the result path has different expressions
+			 * from the target to be applied, a projection step is needed.
+			 * When is_sorted is true the wrap above already carries the
+			 * ordered rel's target, so this only applies to the sorted
+			 * branch.
+			 */
+			if (!equal(sorted_path->pathtarget->exprs, target->exprs))
+				sorted_path = apply_projection_to_path(root, ordered_rel,
+													   sorted_path, target);
+		}
 
 		add_path(ordered_rel, sorted_path);
 	}
diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out
index e3bcc680653..c12b2498f65 100644
--- a/src/test/regress/expected/limit.out
+++ b/src/test/regress/expected/limit.out
@@ -439,14 +439,14 @@ select currval('testseq');
 explain (verbose, costs off)
 select unique1, unique2, generate_series(1,10)
   from tenk1 order by unique2 limit 7;
-                                                                         QUERY PLAN                                                                          
--------------------------------------------------------------------------------------------------------------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
  Limit
    Output: unique1, unique2, (generate_series(1, 10))
    ->  ProjectSet
          Output: unique1, unique2, generate_series(1, 10)
          ->  Index Scan using tenk1_unique2 on public.tenk1
-               Output: unique1, unique2, two, four, ten, twenty, hundred, thousand, twothousand, fivethous, tenthous, odd, even, stringu1, stringu2, string4
+               Output: unique1, unique2
 (6 rows)
 
 select unique1, unique2, generate_series(1,10)
diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out
index 75b1e7d300f..4ae09c8b181 100644
--- a/src/test/regress/expected/select_distinct_on.out
+++ b/src/test/regress/expected/select_distinct_on.out
@@ -81,12 +81,13 @@ select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT ON (four) four,two
    FROM tenk1 WHERE four = 0 ORDER BY 1;
-         QUERY PLAN         
-----------------------------
- Limit
-   ->  Seq Scan on tenk1
-         Filter: (four = 0)
-(3 rows)
+            QUERY PLAN            
+----------------------------------
+ Result
+   ->  Limit
+         ->  Seq Scan on tenk1
+               Filter: (four = 0)
+(4 rows)
 
 -- and check the result of the above query is correct
 SELECT DISTINCT ON (four) four,two
@@ -114,12 +115,13 @@ SELECT DISTINCT ON (four) four,two
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT ON (four) four,hundred
    FROM tenk1 WHERE four = 0 ORDER BY 1,2;
-                  QUERY PLAN                   
------------------------------------------------
- Limit
-   ->  Index Scan using tenk1_hundred on tenk1
-         Filter: (four = 0)
-(3 rows)
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Result
+   ->  Limit
+         ->  Index Scan using tenk1_hundred on tenk1
+               Filter: (four = 0)
+(4 rows)
 
 --
 -- Test the planner's ability to reorder the distinctClause Pathkeys to match
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 933921d1860..a3d6f3d4576 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -753,20 +753,18 @@ end;
 $$ language plpgsql PARALLEL SAFE;
 explain (costs off, verbose)
     select ten, sp_simple_func(ten) from tenk1 where ten < 100 order by ten;
-                     QUERY PLAN                      
------------------------------------------------------
+                  QUERY PLAN                   
+-----------------------------------------------
  Gather Merge
-   Output: ten, (sp_simple_func(ten))
+   Output: ten, sp_simple_func(ten)
    Workers Planned: 4
-   ->  Result
-         Output: ten, sp_simple_func(ten)
-         ->  Sort
+   ->  Sort
+         Output: ten
+         Sort Key: tenk1.ten
+         ->  Parallel Seq Scan on public.tenk1
                Output: ten
-               Sort Key: tenk1.ten
-               ->  Parallel Seq Scan on public.tenk1
-                     Output: ten
-                     Filter: (tenk1.ten < 100)
-(11 rows)
+               Filter: (tenk1.ten < 100)
+(9 rows)
 
 drop function sp_simple_func(integer);
 -- test handling of SRFs in targetlist (bug in 10.0)
@@ -1261,18 +1259,16 @@ SELECT generate_series(1, two), array(select generate_series(1, two))
    ->  Gather Merge
          Output: tenk1.two, tenk1.tenthous
          Workers Planned: 4
-         ->  Result
-               Output: tenk1.two, tenk1.tenthous
-               ->  Sort
+         ->  Sort
+               Output: tenk1.tenthous, tenk1.two
+               Sort Key: tenk1.tenthous
+               ->  Parallel Seq Scan on public.tenk1
                      Output: tenk1.tenthous, tenk1.two
-                     Sort Key: tenk1.tenthous
-                     ->  Parallel Seq Scan on public.tenk1
-                           Output: tenk1.tenthous, tenk1.two
    SubPlan array_1
      ->  ProjectSet
            Output: generate_series(1, tenk1.two)
            ->  Result
-(16 rows)
+(14 rows)
 
 -- must disallow pushing sort below gather when pathkey contains an SRF
 EXPLAIN (VERBOSE, COSTS OFF)
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index c4f7b187f5b..a0d295859ed 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -459,12 +459,12 @@ reset enable_hashagg;
 -- case with degenerate ORDER BY
 explain (verbose, costs off)
 select 'foo' as f, generate_series(1,2) as g from few order by 1;
-                  QUERY PLAN                  
-----------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  ProjectSet
-   Output: 'foo'::text, generate_series(1, 2)
+   Output: ('foo'::text), generate_series(1, 2)
    ->  Seq Scan on public.few
-         Output: id, dataa, datab
+         Output: 'foo'::text
 (4 rows)
 
 select 'foo' as f, generate_series(1,2) as g from few order by 1;
-- 
2.53.0

view thread (8+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected]
  Subject: Re: A very quick observation of dangling pointers in Postgres pathlists
  In-Reply-To: <[email protected]>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox