BUG #19355: Attempt to insert data unexpectedly during concurrent update

public inbox for [email protected]  
help / color / mirror / Atom feed

BUG #19355: Attempt to insert data unexpectedly during concurrent update
12+ messages / 7 participants
[nested] [flat]

* BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-15 01:40  PG Bug reporting form <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: PG Bug reporting form @ 2025-12-15 01:40 UTC (permalink / raw)
  To: [email protected]; +Cc: [email protected]

The following bug has been logged on the website:

Bug reference:      19355
Logged by:          Bihua Wang
Email address:      [email protected]
PostgreSQL version: 18.1
Operating system:   linux
Description:        

Start two transaction and update on same tuple, raise concurrent update and
evalplanqual.  It will be found out that the session with evalplanqual did
not successfully update the data, but instead attempted to insert a row of
data incorrectly.

-- postgresql version
psql (18.1)
Type "help" for help.

postgres=# SELECT version();
                                                 version
---------------------------------------------------------------------------------------------------------
 PostgreSQL 18.1 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5
20150623 (EulerOS 4.8.5-28), 64-bit

-- prepare relation
drop table t1,t2,t3;
create table t1(a int primary key, b int);
create table t2(a int, b int);
create table t3(a int, b int);

insert into t1 values(1,0),(2,0);
insert into t2 values(1,1),(2,2);
insert into t3 values(3,3);
-- test sql
merge /*+ nestloop(t3 t1)) */ into t1 using
        (select t2.a as a from t2 group by t2.a) as t3
        on (t1.a = t3.a)
when matched then
        update set b = t1.b + 1
when not matched then
        insert (a,b) values (1,1);

--make sure plan like this, may need to execute  "set enable_hashjoin=off"
if necessay
postgres=*#  explain merge /*+ nestloop(t3 t1)) */ into t1 using
        (select t2.a as a from t2 group by t2.a) as t3
        on (t1.a = t3.a)
when matched then
        update set b = t1.b + 1
when not matched then
        insert (a,b) values (1,1);
postgres-*# postgres-*# postgres-*# postgres-*# postgres-*# postgres-*#
QUERY PLAN
-------------------------------------------------------------------------------
 Merge on t1  (cost=38.41..124.75 rows=0 width=0)
   ->  Nested Loop Left Join  (cost=38.41..124.75 rows=200 width=34)
         ->  Subquery Scan on t3  (cost=38.25..42.25 rows=200 width=32)
               ->  HashAggregate  (cost=38.25..40.25 rows=200 width=4)
                     Group Key: t2.a
                     ->  Seq Scan on t2  (cost=0.00..32.60 rows=2260
width=4)
         ->  Index Scan using t1_pkey on t1  (cost=0.15..0.41 rows=1
width=10)
               Index Cond: (a = t3.a)
(8 rows)


-- session 1:
postgres=# begin;
BEGIN
postgres=*#  explain merge /*+ nestloop(t3 t1)) */ into t1 using
        (select t2.a as a from t2 group by t2.a) as t3
        on (t1.a = t3.a)
when matched then
        update set b = t1.b + 1
when not matched then
        insert (a,b) values (1,1);
postgres-*# postgres-*# postgres-*# postgres-*# postgres-*# postgres-*#
QUERY PLAN
-------------------------------------------------------------------------------
 Merge on t1  (cost=38.41..124.75 rows=0 width=0)
   ->  Nested Loop Left Join  (cost=38.41..124.75 rows=200 width=34)
         ->  Subquery Scan on t3  (cost=38.25..42.25 rows=200 width=32)
               ->  HashAggregate  (cost=38.25..40.25 rows=200 width=4)
                     Group Key: t2.a
                     ->  Seq Scan on t2  (cost=0.00..32.60 rows=2260
width=4)
         ->  Index Scan using t1_pkey on t1  (cost=0.15..0.41 rows=1
width=10)
               Index Cond: (a = t3.a)
(8 rows)

postgres=*# merge /*+ nestloop(t3 t1)) */ into t1 using
        (select t2.a as a from t2 group by t2.a) as t3
        on (t1.a = t3.a)
when matched then
        update set b = t1.b + 1
when not matched then
        insert (a,b) values (1,1);
postgres-*# postgres-*# postgres-*# postgres-*# postgres-*# postgres-*#
MERGE 2
postgres=*# end;
COMMIT

-- session 2
postgres=# begin;
BEGIN
postgres=*# explain merge /*+ nestloop(t3 t1)) */ into t1 using
        (select t2.a as a from t2 group by t2.a) as t3
        on (t1.a = t3.a)
when matched then
        update set b = t1.b + 1
when not matched then
        insert (a,b) values (1,1);
postgres-*# postgres-*# postgres-*# postgres-*# postgres-*# postgres-*#
QUERY PLAN
-------------------------------------------------------------------------------
 Merge on t1  (cost=38.41..124.75 rows=0 width=0)
   ->  Nested Loop Left Join  (cost=38.41..124.75 rows=200 width=34)
         ->  Subquery Scan on t3  (cost=38.25..42.25 rows=200 width=32)
               ->  HashAggregate  (cost=38.25..40.25 rows=200 width=4)
                     Group Key: t2.a
                     ->  Seq Scan on t2  (cost=0.00..32.60 rows=2260
width=4)
         ->  Index Scan using t1_pkey on t1  (cost=0.15..0.41 rows=1
width=10)
               Index Cond: (a = t3.a)
(8 rows)

postgres=*# merge /*+ nestloop(t3 t1)) */ into t1 using
        (select t2.a as a from t2 group by t2.a) as t3
        on (t1.a = t3.a)
when matched then
        update set b = t1.b + 1
when not matched then
        insert (a,b) values (1,1);
postgres-*# postgres-*# postgres-*# postgres-*# postgres-*# postgres-*#
ERROR:  duplicate key value violates unique constraint "t1_pkey"
DETAIL:  Key (a)=(1) already exists.



^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-15 06:25  Laurenz Albe <[email protected]>
  parent: PG Bug reporting form <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Laurenz Albe @ 2025-12-15 06:25 UTC (permalink / raw)
  To: [email protected]; [email protected]

On Mon, 2025-12-15 at 01:40 +0000, PG Bug reporting form wrote:
> Start two transaction and update on same tuple, raise concurrent update and
> evalplanqual.  It will be found out that the session with evalplanqual did
> not successfully update the data, but instead attempted to insert a row of
> data incorrectly.

I'd say that is expected.

If you need a guarantee that either INSERT or UPDATE succeed, you have to use
INSERT ... ON CONFLICT ... DO UPDATE

Yours,
Laurenz Albe






^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-22 14:51  Bh W <[email protected]>
  parent: Laurenz Albe <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Bh W @ 2025-12-22 14:51 UTC (permalink / raw)
  To: Laurenz Albe <[email protected]>; +Cc: [email protected]

The issue is that the MERGE INTO match condition is not updated.
In the MATCHED path of MERGE INTO, when the target row satisfies the match
condition and the condition itself has not changed, the system should still
be able to handle concurrent updates to the same target row by relying on
EvalPlanQual (EPQ) to refetch the latest version of the tuple, and then
proceed with the intended update.
However, in the current implementation, even though the concurrent update
does not modify any columns relevant to the ON condition, the EPQ recheck
unexpectedly results in a match condition failure, causing the update path
that should remain MATCHED to be treated as NOT MATCHED.

In the scenario shown above, if no primary key exists, an extra row will be
inserted.

Further investigation shows that execution plans using Hash Join do not
exhibit this problem.

Could you please take a look at my explanation and let me know if anything
is inaccurate? I’d also appreciate it if you could check the test scenario
I provided. Thanks a lot!

Laurenz Albe <[email protected]> 于2025年12月15日周一 14:25写道：

> On Mon, 2025-12-15 at 01:40 +0000, PG Bug reporting form wrote:
> > Start two transaction and update on same tuple, raise concurrent update
> and
> > evalplanqual.  It will be found out that the session with evalplanqual
> did
> > not successfully update the data, but instead attempted to insert a row
> of
> > data incorrectly.
>
> I'd say that is expected.
>
> If you need a guarantee that either INSERT or UPDATE succeed, you have to
> use
> INSERT ... ON CONFLICT ... DO UPDATE
>
> Yours,
> Laurenz Albe
>

^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-22 22:37  Dean Rasheed <[email protected]>
  parent: Bh W <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Dean Rasheed @ 2025-12-22 22:37 UTC (permalink / raw)
  To: Bh W <[email protected]>; +Cc: Laurenz Albe <[email protected]>; [email protected]; Amit Langote <[email protected]>

On Mon, 22 Dec 2025 at 14:51, Bh W <[email protected]> wrote:
>
> The issue is that the MERGE INTO match condition is not updated.
> In the MATCHED path of MERGE INTO, when the target row satisfies the match condition and the condition itself has not changed, the system should still be able to handle concurrent updates to the same target row by relying on EvalPlanQual (EPQ) to refetch the latest version of the tuple, and then proceed with the intended update.
> However, in the current implementation, even though the concurrent update does not modify any columns relevant to the ON condition, the EPQ recheck unexpectedly results in a match condition failure, causing the update path that should remain MATCHED to be treated as NOT MATCHED.

I spent a little time looking at this, and managed to reduce the
reproducer test case down to this:

-- Setup
drop table if exists t1,t2;
create table t1(a int primary key, b int);
create table t2(a int, b int);

insert into t1 values(1,0),(2,0);
insert into t2 values(1,1),(2,2);

-- Session 1
begin;
update t1 set b = b+1;

-- Session 2
merge into t1 using (values(1,1),(2,2)) as t3(a,b) on (t1.a = t3.a)
when matched then
      update set b = t1.b + 1
when not matched then
      insert (a,b) values (1,1);

-- Session 1
commit;

This works fine in PG17, but fails with a PK violation in PG18.
Git-bisecting points to this commit:

cbc127917e04a978a788b8bc9d35a70244396d5b is the first bad commit
commit cbc127917e04a978a788b8bc9d35a70244396d5b
Author: Amit Langote <[email protected]>
Date:   Fri Feb 7 17:15:09 2025 +0900

    Track unpruned relids to avoid processing pruned relations

Doing a little more debugging, it looks like the problem might be this
change in InitPlan():

-           /* ignore "parent" rowmarks; they are irrelevant at runtime */
-           if (rc->isParent)
+           /*
+            * Ignore "parent" rowmarks, because they are irrelevant at
+            * runtime.  Also ignore the rowmarks belonging to child tables
+            * that have been pruned in ExecDoInitialPruning().
+            */
+           if (rc->isParent ||
+               !bms_is_member(rc->rti, estate->es_unpruned_relids))
                continue;

which seems to cause it to incorrectly skip a rowmark, which I suspect
is what is causing EvalPlanQual() to return the wrong result.

Regards,
Dean






^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-24 08:08  Amit Langote <[email protected]>
  parent: Dean Rasheed <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Amit Langote @ 2025-12-24 08:08 UTC (permalink / raw)
  To: Dean Rasheed <[email protected]>; +Cc: Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

Hi,

On Tue, Dec 23, 2025 at 4:07 Dean Rasheed <[email protected]> wrote:

> On Mon, 22 Dec 2025 at 14:51, Bh W <[email protected]> wrote:
> >
> > The issue is that the MERGE INTO match condition is not updated.
> > In the MATCHED path of MERGE INTO, when the target row satisfies the
> match condition and the condition itself has not changed, the system should
> still be able to handle concurrent updates to the same target row by
> relying on EvalPlanQual (EPQ) to refetch the latest version of the tuple,
> and then proceed with the intended update.
> > However, in the current implementation, even though the concurrent
> update does not modify any columns relevant to the ON condition, the EPQ
> recheck unexpectedly results in a match condition failure, causing the
> update path that should remain MATCHED to be treated as NOT MATCHED.
>
> I spent a little time looking at this, and managed to reduce the
> reproducer test case down to this:
>
> -- Setup
> drop table if exists t1,t2;
> create table t1(a int primary key, b int);
> create table t2(a int, b int);
>
> insert into t1 values(1,0),(2,0);
> insert into t2 values(1,1),(2,2);
>
> -- Session 1
> begin;
> update t1 set b = b+1;
>
> -- Session 2
> merge into t1 using (values(1,1),(2,2)) as t3(a,b) on (t1.a = t3.a)
> when matched then
>       update set b = t1.b + 1
> when not matched then
>       insert (a,b) values (1,1);
>
> -- Session 1
> commit;
>
> This works fine in PG17, but fails with a PK violation in PG18.
> Git-bisecting points to this commit:
>
> cbc127917e04a978a788b8bc9d35a70244396d5b is the first bad commit
> commit cbc127917e04a978a788b8bc9d35a70244396d5b
> Author: Amit Langote <[email protected]>
> Date:   Fri Feb 7 17:15:09 2025 +0900
>
>     Track unpruned relids to avoid processing pruned relations
>
> Doing a little more debugging, it looks like the problem might be this
> change in InitPlan():
>
> -           /* ignore "parent" rowmarks; they are irrelevant at runtime */
> -           if (rc->isParent)
> +           /*
> +            * Ignore "parent" rowmarks, because they are irrelevant at
> +            * runtime.  Also ignore the rowmarks belonging to child tables
> +            * that have been pruned in ExecDoInitialPruning().
> +            */
> +           if (rc->isParent ||
> +               !bms_is_member(rc->rti, estate->es_unpruned_relids))
>                 continue;
>
> which seems to cause it to incorrectly skip a rowmark, which I suspect
> is what is causing EvalPlanQual() to return the wrong result.


Thanks for the detailed analysis and adding me to the thread, Dean.

I would think that a case that involves no partitioning at all would be
untouchable by this code, but it looks like the logic I added is
incorrectly affecting cases where pruning isn’t even relevant. I’ll need to
look more carefully at why such a rowmark would exist in the rowmarks list
if its relation isn’t in es_unpruned_relids. Maybe the set population is
incorrect at some point, or perhaps it matters that the set is a copy in
the EPQ estate.

I’m afk (on vacation) at the moment, so won’t be able to dig into this
until next week.

— Amit

>


^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-24 12:07  Tender Wang <[email protected]>
  parent: Amit Langote <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Tender Wang @ 2025-12-24 12:07 UTC (permalink / raw)
  To: Amit Langote <[email protected]>; +Cc: Dean Rasheed <[email protected]>; Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

Amit Langote <[email protected]> 于2025年12月24日周三 16:08写道：

> Hi,
>
> On Tue, Dec 23, 2025 at 4:07 Dean Rasheed <[email protected]>
> wrote:
>
>> On Mon, 22 Dec 2025 at 14:51, Bh W <[email protected]> wrote:
>> >
>> > The issue is that the MERGE INTO match condition is not updated.
>> > In the MATCHED path of MERGE INTO, when the target row satisfies the
>> match condition and the condition itself has not changed, the system should
>> still be able to handle concurrent updates to the same target row by
>> relying on EvalPlanQual (EPQ) to refetch the latest version of the tuple,
>> and then proceed with the intended update.
>> > However, in the current implementation, even though the concurrent
>> update does not modify any columns relevant to the ON condition, the EPQ
>> recheck unexpectedly results in a match condition failure, causing the
>> update path that should remain MATCHED to be treated as NOT MATCHED.
>>
>> I spent a little time looking at this, and managed to reduce the
>> reproducer test case down to this:
>>
>> -- Setup
>> drop table if exists t1,t2;
>> create table t1(a int primary key, b int);
>> create table t2(a int, b int);
>>
>> insert into t1 values(1,0),(2,0);
>> insert into t2 values(1,1),(2,2);
>>
>> -- Session 1
>> begin;
>> update t1 set b = b+1;
>>
>> -- Session 2
>> merge into t1 using (values(1,1),(2,2)) as t3(a,b) on (t1.a = t3.a)
>> when matched then
>>       update set b = t1.b + 1
>> when not matched then
>>       insert (a,b) values (1,1);
>>
>> -- Session 1
>> commit;
>>
>> This works fine in PG17, but fails with a PK violation in PG18.
>> Git-bisecting points to this commit:
>>
>> cbc127917e04a978a788b8bc9d35a70244396d5b is the first bad commit
>> commit cbc127917e04a978a788b8bc9d35a70244396d5b
>> Author: Amit Langote <[email protected]>
>> Date:   Fri Feb 7 17:15:09 2025 +0900
>>
>>     Track unpruned relids to avoid processing pruned relations
>>
>> Doing a little more debugging, it looks like the problem might be this
>> change in InitPlan():
>>
>> -           /* ignore "parent" rowmarks; they are irrelevant at runtime */
>> -           if (rc->isParent)
>> +           /*
>> +            * Ignore "parent" rowmarks, because they are irrelevant at
>> +            * runtime.  Also ignore the rowmarks belonging to child
>> tables
>> +            * that have been pruned in ExecDoInitialPruning().
>> +            */
>> +           if (rc->isParent ||
>> +               !bms_is_member(rc->rti, estate->es_unpruned_relids))
>>                 continue;
>>
>> which seems to cause it to incorrectly skip a rowmark, which I suspect
>> is what is causing EvalPlanQual() to return the wrong result.
>
>
> Thanks for the detailed analysis and adding me to the thread, Dean.
>
> I would think that a case that involves no partitioning at all would be
> untouchable by this code, but it looks like the logic I added is
> incorrectly affecting cases where pruning isn’t even relevant. I’ll need to
> look more carefully at why such a rowmark would exist in the rowmarks list
> if its relation isn’t in es_unpruned_relids. Maybe the set population is
> incorrect at some point, or perhaps it matters that the set is a copy in
> the EPQ estate.
>

I did some debugging, and I found that:
In  add_rte_to_flat_rtable(),  the RTE of value was not added into
glob->AllRelids, because below codes:
.....
if (newrte->rtekind == RTE_RELATION ||
(newrte->rtekind == RTE_SUBQUERY && OidIsValid(newrte->relid)))
{
     glob->relationOids = lappend_oid(glob->relationOids, newrte->relid);
     glob->allRelids = bms_add_member(glob->allRelids,
         list_length(glob->finalrtable));
}
....

The VALUE rte was not satisfied above if, so it was not added into the
glob->allRelids.
Then in standard_planner(), we have:
....
result->unprunableRelids = bms_difference(glob->allRelids,
 glob->prunableRelids);
....
So the result->unprunableRelids contains only 1.

In InitPlan(), we have:
.....
/*
* Ignore "parent" rowmarks, because they are irrelevant at
* runtime.  Also ignore the rowmarks belonging to child tables
* that have been pruned in ExecDoInitialPruning().
*/
if (rc->isParent ||
   !bms_is_member(rc->rti, estate->es_unpruned_relids))
    continue;
.....
the estate->es_unpruned_relids equals with result->unprunableRelids
contains. So the rowMark was skipped incorrectly.

I did a quick fix as the attached patch.
Any thoughts?

-- 
Thanks,
Tender Wang


Attachments:

  [application/octet-stream] 0001-fix-concurrent-update.patch (978B, 3-0001-fix-concurrent-update.patch)
  download | inline diff:
From ed9194a5735db35c6a21c3d7e9f3a6b283cbfbf2 Mon Sep 17 00:00:00 2001
From: Tender Wang <[email protected]>
Date: Wed, 24 Dec 2025 19:57:56 +0800
Subject: [PATCH] fix concurrent update.

---
 src/backend/optimizer/plan/setrefs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index cd7ea1e6b58..83662134d2e 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -584,6 +584,10 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, List *rteperminfos,
 		(newrte->rtekind == RTE_SUBQUERY && OidIsValid(newrte->relid)))
 	{
 		glob->relationOids = lappend_oid(glob->relationOids, newrte->relid);
+	}
+	if (newrte->rtekind == RTE_RELATION || newrte->rtekind == RTE_VALUES ||
+		(newrte->rtekind == RTE_SUBQUERY && OidIsValid(newrte->relid)))
+	{
 		glob->allRelids = bms_add_member(glob->allRelids,
 										 list_length(glob->finalrtable));
 	}
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-24 23:11  Dean Rasheed <[email protected]>
  parent: Tender Wang <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Dean Rasheed @ 2025-12-24 23:11 UTC (permalink / raw)
  To: Tender Wang <[email protected]>; +Cc: Amit Langote <[email protected]>; Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

On Wed, 24 Dec 2025 at 12:07, Tender Wang <[email protected]> wrote:
>
> I did some debugging, and I found that:
> In  add_rte_to_flat_rtable(),  the RTE of value was not added into glob->AllRelids, because below codes:
> .....
> the estate->es_unpruned_relids equals with result->unprunableRelids contains. So the rowMark was skipped incorrectly.
>
> I did a quick fix as the attached patch.
> Any thoughts?

Yes. However, it's not sufficient to only add RTE_VALUES RTEs to what
gets included in PlannerGlobal.allRelids. Rowmarks can be attached to
other kinds of RTEs. An obvious example is an RTE_SUBQUERY RTE that is
an actual subquery that did not come from a view. So, for this
approach to work robustly, it really should include *all* RTEs in
PlannerGlobal.allRelids.

I took a slightly different approach, which was to change the test in
InitPlan() (and also in ExecInitLockRows() and ExecInitModifyTable())
to only ignore rowmarks for pruned relations that are plain
RTE_RELATION relations, since those are the only relations that are
ever actually pruned. So rowmarks attached to any other kind of RTE
are not ignored. I also added an isolation test case.

I'm somewhat conflicted as to which approach is better. I think maybe
there is less chance of other unintended side-effects if the set of
RTEs included in PlannerGlobal.allRelids, unprunableRelids, and
es_unpruned_relids is not changed. However, as it stands,
PlannerGlobal.allRelids is misnamed (it should probably have been
called "relationRelids", in line with the "relationOids" field).
Making it actually include all RTEs would solve that.

Regards,
Dean


Attachments:

  [text/x-patch] fix-rowmark-skipping-for-pruned-relations.patch (5.6K, 2-fix-rowmark-skipping-for-pruned-relations.patch)
  download | inline diff:
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
new file mode 100644
index 797d8b1..01dffde
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -880,21 +880,25 @@ InitPlan(QueryDesc *queryDesc, int eflag
 		foreach(l, plannedstmt->rowMarks)
 		{
 			PlanRowMark *rc = (PlanRowMark *) lfirst(l);
+			RangeTblEntry *rte = exec_rt_fetch(rc->rti, estate);
 			Oid			relid;
 			Relation	relation;
 			ExecRowMark *erm;
 
+			/* ignore "parent" rowmarks; they are irrelevant at runtime */
+			if (rc->isParent)
+				continue;
+
 			/*
-			 * Ignore "parent" rowmarks, because they are irrelevant at
-			 * runtime.  Also ignore the rowmarks belonging to child tables
-			 * that have been pruned in ExecDoInitialPruning().
+			 * Also ignore rowmarks belonging to child tables that have been
+			 * pruned in ExecDoInitialPruning().
 			 */
-			if (rc->isParent ||
+			if (rte->rtekind == RTE_RELATION &&
 				!bms_is_member(rc->rti, estate->es_unpruned_relids))
 				continue;
 
 			/* get relation's OID (will produce InvalidOid if subquery) */
-			relid = exec_rt_fetch(rc->rti, estate)->relid;
+			relid = rte->relid;
 
 			/* open relation, if we need to access it for this mark type */
 			switch (rc->markType)
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
new file mode 100644
index a8afbf9..5d38bd4
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -344,15 +344,19 @@ ExecInitLockRows(LockRows *node, EState
 	foreach(lc, node->rowMarks)
 	{
 		PlanRowMark *rc = lfirst_node(PlanRowMark, lc);
+		RangeTblEntry *rte = exec_rt_fetch(rc->rti, estate);
 		ExecRowMark *erm;
 		ExecAuxRowMark *aerm;
 
+		/* ignore "parent" rowmarks; they are irrelevant at runtime */
+		if (rc->isParent)
+			continue;
+
 		/*
-		 * Ignore "parent" rowmarks, because they are irrelevant at runtime.
-		 * Also ignore the rowmarks belonging to child tables that have been
+		 * Also ignore rowmarks belonging to child tables that have been
 		 * pruned in ExecDoInitialPruning().
 		 */
-		if (rc->isParent ||
+		if (rte->rtekind == RTE_RELATION &&
 			!bms_is_member(rc->rti, estate->es_unpruned_relids))
 			continue;
 
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
new file mode 100644
index 874b71e..9066a58
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -5092,15 +5092,19 @@ ExecInitModifyTable(ModifyTable *node, E
 	foreach(l, node->rowMarks)
 	{
 		PlanRowMark *rc = lfirst_node(PlanRowMark, l);
+		RangeTblEntry *rte = exec_rt_fetch(rc->rti, estate);
 		ExecRowMark *erm;
 		ExecAuxRowMark *aerm;
 
+		/* ignore "parent" rowmarks; they are irrelevant at runtime */
+		if (rc->isParent)
+			continue;
+
 		/*
-		 * Ignore "parent" rowmarks, because they are irrelevant at runtime.
-		 * Also ignore the rowmarks belonging to child tables that have been
+		 * Also ignore rowmarks belonging to child tables that have been
 		 * pruned in ExecDoInitialPruning().
 		 */
-		if (rc->isParent ||
+		if (rte->rtekind == RTE_RELATION &&
 			!bms_is_member(rc->rti, estate->es_unpruned_relids))
 			continue;
 
diff --git a/src/test/isolation/expected/eval-plan-qual.out b/src/test/isolation/expected/eval-plan-qual.out
new file mode 100644
index 05fffe0..a122047
--- a/src/test/isolation/expected/eval-plan-qual.out
+++ b/src/test/isolation/expected/eval-plan-qual.out
@@ -1473,3 +1473,23 @@ step s2pp4: DELETE FROM another_parttbl
 step c1: COMMIT;
 step s2pp4: <... completed>
 step c2: COMMIT;
+
+starting permutation: updateformergevalues mergevalues c1 c2 read
+step updateformergevalues: UPDATE accounts SET balance = balance + 100;
+step mergevalues: 
+	MERGE INTO accounts
+	USING (VALUES ('checking', 610), ('savings', 620)) v(accountid, balance)
+	ON v.accountid = accounts.accountid
+	WHEN MATCHED THEN UPDATE SET balance = v.balance
+	WHEN NOT MATCHED THEN INSERT VALUES ('unmatched', -1);
+ <waiting ...>
+step c1: COMMIT;
+step mergevalues: <... completed>
+step c2: COMMIT;
+step read: SELECT * FROM accounts ORDER BY accountid;
+accountid|balance|balance2
+---------+-------+--------
+checking |    610|    1220
+savings  |    620|    1240
+(2 rows)
+
diff --git a/src/test/isolation/specs/eval-plan-qual.spec b/src/test/isolation/specs/eval-plan-qual.spec
new file mode 100644
index 80e1e6b..fb57fb2
--- a/src/test/isolation/specs/eval-plan-qual.spec
+++ b/src/test/isolation/specs/eval-plan-qual.spec
@@ -206,6 +206,8 @@ step sys1	{
 
 step s1pp1 { UPDATE another_parttbl SET b = b + 1 WHERE a = 1; }
 
+step updateformergevalues { UPDATE accounts SET balance = balance + 100; }
+
 session s2
 setup		{ BEGIN ISOLATION LEVEL READ COMMITTED; }
 step wx2	{ UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance; }
@@ -318,6 +320,14 @@ step s2pp2 { PREPARE epd AS DELETE FROM
 step s2pp3 { EXECUTE epd(1); }
 step s2pp4 { DELETE FROM another_parttbl WHERE a = (SELECT 1); }
 
+step mergevalues {
+	MERGE INTO accounts
+	USING (VALUES ('checking', 610), ('savings', 620)) v(accountid, balance)
+	ON v.accountid = accounts.accountid
+	WHEN MATCHED THEN UPDATE SET balance = v.balance
+	WHEN NOT MATCHED THEN INSERT VALUES ('unmatched', -1);
+}
+
 session s3
 setup		{ BEGIN ISOLATION LEVEL READ COMMITTED; }
 step read	{ SELECT * FROM accounts ORDER BY accountid; }
@@ -425,3 +435,6 @@ permutation sys1 sysmerge2 c1 c2
 # Exercise run-time partition pruning code in an EPQ recheck
 permutation s1pp1 s2pp1 s2pp2 s2pp3 c1 c2
 permutation s1pp1 s2pp4 c1 c2
+
+# test EPQ recheck in MERGE from VALUES_RTE, cf bug #19355
+permutation updateformergevalues mergevalues c1 c2 read


^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-25 04:48  Tender Wang <[email protected]>
  parent: Dean Rasheed <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Tender Wang @ 2025-12-25 04:48 UTC (permalink / raw)
  To: Dean Rasheed <[email protected]>; +Cc: Amit Langote <[email protected]>; Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

Dean Rasheed <[email protected]> 于2025年12月25日周四 07:12写道：

> On Wed, 24 Dec 2025 at 12:07, Tender Wang <[email protected]> wrote:
> >
> > I did some debugging, and I found that:
> > In  add_rte_to_flat_rtable(),  the RTE of value was not added into
> glob->AllRelids, because below codes:
> > .....
> > the estate->es_unpruned_relids equals with result->unprunableRelids
> contains. So the rowMark was skipped incorrectly.
> >
> > I did a quick fix as the attached patch.
> > Any thoughts?
>
> Yes. However, it's not sufficient to only add RTE_VALUES RTEs to what
> gets included in PlannerGlobal.allRelids. Rowmarks can be attached to
> other kinds of RTEs. An obvious example is an RTE_SUBQUERY RTE that is
> an actual subquery that did not come from a view. So, for this
> approach to work robustly, it really should include *all* RTEs in
> PlannerGlobal.allRelids.
>

Yes,  it is. I can reproduce the same with a subquery as below:

merge into t1 using (select a from (values(1,1),(2,2) as t4(a,b)) as t3(a)
on (t1.a = t3.a)
when matched then
      update set b = t1.b + 1
when not matched then
      insert (a,b) values (1,1);
or
merge into t1 using (select generate_series(1,2) as a) as t3  on (t1.a =
t3.a)
when matched then
      update set b = t1.b + 1
when not matched then
      insert (a,b) values (1,1);

All reported the same error. My approach obviously can not cover all cases.

>
> I took a slightly different approach, which was to change the test in
> InitPlan() (and also in ExecInitLockRows() and ExecInitModifyTable())
> to only ignore rowmarks for pruned relations that are plain
> RTE_RELATION relations, since those are the only relations that are
> ever actually pruned. So rowmarks attached to any other kind of RTE
> are not ignored. I also added an isolation test case.
>

I failed to apply your patch, some errors, like "error: git apply: bad
git-diff - expected /dev/null on line 2"
or "Patch format detection failed."
 I looked through the patch. It worked for me.


> I'm somewhat conflicted as to which approach is better. I think maybe
> there is less chance of other unintended side-effects if the set of
> RTEs included in PlannerGlobal.allRelids, unprunableRelids, and
> es_unpruned_relids is not changed. However, as it stands,
> PlannerGlobal.allRelids is misnamed (it should probably have been
> called "relationRelids", in line with the "relationOids" field).
> Making it actually include all RTEs would solve that.
>
.....
 /*
* RT indexes of all relation RTEs in finalrtable (RTE_RELATION and
* RTE_SUBQUERY RTEs of views)
*/
Bitmapset  *allRelids;
......
Per the comment, I guess Amit didn't plan to include all RTEs in his design.


-- 
Thanks,
Tender Wang


^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2025-12-31 16:16  Bernice Southey <[email protected]>
  parent: Tender Wang <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Bernice Southey @ 2025-12-31 16:16 UTC (permalink / raw)
  To: Tender Wang <[email protected]>; +Cc: Dean Rasheed <[email protected]>; Amit Langote <[email protected]>; Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

Hi,

I've spent some time looking at the code and history of this bug
because I also ran into it [1] and I was deeply curious why concurrent
updates worked with a hash join + seq scan but not a nested loop +
index scan.

I was also curious if this bug is why Yuri didn't see deadlocks in
this related bug [2].

FWIW, I found a simpler way to reproduce this:

create table t(id int primary key, count int);
insert into t values (1, 0), (2, 0);

--session 1
begin;
update t set count = count + 1;

--session 2
set enable_seqscan = off;
update t set count = count + 1 from (values (1), (2)) v(id) where t.id = v.id;

Only one update succeeds in session 2.

Tender Wang wrote:
> Dean Rasheed wrote:
>> I'm somewhat conflicted as to which approach is better. I think maybe
>> there is less chance of other unintended side-effects if the set of
>> RTEs included in PlannerGlobal.allRelids, unprunableRelids, and
>> es_unpruned_relids is not changed. However, as it stands,
>> PlannerGlobal.allRelids is misnamed (it should probably have been
>> called "relationRelids", in line with the "relationOids" field).
>> Making it actually include all RTEs would solve that.

> Per the comment, I guess Amit didn't plan to include all RTEs in his design.

While digging, I noticed that allRelids seems only to be used to
create unprunableRelids. But unprunableRelids is used among other
things, to create es_unpruned_relids and as an unpruned_relids
parameter to ExecInitRangeTable. It's difficult to follow all the
paths where the missing relids will matter. So I think Tender's
approach is safer if amended to include all relids. I'm very new to
postgres source code, so this is a very tentative opinion. Amit can
best answer after his holiday.

I was curious enough to test in the meantime. I made an experimental
patch that just adds all rels to allRelids. This fixed all the issues
I'm aware of. It caused overexplain tests to fail by adding in new
non-relation relIds as expected. Apologies if I'm getting the
terminology wrong.

I ran Yuri's reproducer on my patched branch and indeed got several deadlocks.
psql:crash.sql:3: ERROR:  deadlock detected
DETAIL:  Process 71673 waits for ShareLock on transaction 2766;
blocked by process 71643.
Process 71643 waits for ShareLock on transaction 2777; blocked by process 71673.

Curiously, in the test I added (based on Dean's), enable_indexscan or
enable_seqscan didn't make any difference to the plan. But when I run
the reproducer manually, it does change the plan. Since the bug
disappears with a hash join + seq scan, the plan is critical. So I
included 'set enable_seqscan to 0' for reference and future proofing.

I didn't attempt to fix the comment because I don't understand it. I
also didn't check all my tabs vs spaces, or run TAP tests, as this
patch is just meant for info. Most of the code is still way over my
head, so my apologies if none of this is helpful.

As to why this works for a hash join but not a nested loop, I have
some ideas, but I'm still digging.

Happy New Year!
Bernice

[1] https://www.postgresql.org/message-id/flat/CAMbWs4-uC8DWRZvByHej%2B9E5em7V_%2BzNyRfFx-UUpR4HwifK4w%4...
[2] https://www.postgresql.org/message-id/ly6rss443iajiv5cfypzu6fgrmxdbwzdtuujkddr4eis7s2gcq%40gcdytiget...


Attachments:

  [text/x-patch] concurrent-update-experiment.patch (5.4K, 2-concurrent-update-experiment.patch)
  download | inline diff:
From 54526773321a525a0e806bf17353537a4bc23fef Mon Sep 17 00:00:00 2001
From: Bernice Southey <[email protected]>
Date: Wed, 31 Dec 2025 14:26:39 +0000
Subject: [PATCH] concurrent update experiment

---
 .../pg_overexplain/expected/pg_overexplain.out  |  9 +++++----
 src/backend/optimizer/plan/setrefs.c            |  5 +++--
 src/test/isolation/expected/eval-plan-qual.out  | 17 +++++++++++++++++
 src/test/isolation/specs/eval-plan-qual.spec    | 10 ++++++++++
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/contrib/pg_overexplain/expected/pg_overexplain.out b/contrib/pg_overexplain/expected/pg_overexplain.out
index 55d34666d87..45a71af5776 100644
--- a/contrib/pg_overexplain/expected/pg_overexplain.out
+++ b/contrib/pg_overexplain/expected/pg_overexplain.out
@@ -47,7 +47,8 @@ EXPLAIN (RANGE_TABLE) SELECT 1;
    RTIs: 1
  RTI 1 (result):
    Eref: "*RESULT*" ()
-(4 rows)
+ Unprunable RTIs: 1
+(5 rows)
 
 -- Create a partitioned table.
 CREATE TABLE vegetables (id serial, name text, genus text)
@@ -141,7 +142,7 @@ $$);
    Relation: daucus
    Relation Kind: relation
    Relation Lock Mode: AccessShareLock
- Unprunable RTIs: 1 3 4
+ Unprunable RTIs: 1 2 3 4
 (53 rows)
 
 -- Test a different output format.
@@ -292,7 +293,7 @@ $$);
          <Security-Barrier>false</Security-Barrier>                 +
          <Lateral>false</Lateral>                                   +
        </Range-Table-Entry>                                         +
-       <Unprunable-RTIs>1 3 4</Unprunable-RTIs>                     +
+       <Unprunable-RTIs>1 2 3 4</Unprunable-RTIs>                   +
        <Result-RTIs>none</Result-RTIs>                              +
      </Range-Table>                                                 +
    </Query>                                                         +
@@ -485,7 +486,7 @@ INSERT INTO vegetables (name, genus) VALUES ('broccoflower', 'brassica');
    Permission Info Index: 1
  RTI 2 (result):
    Eref: "*RESULT*" ()
- Unprunable RTIs: 1
+ Unprunable RTIs: 1 2
  Result RTIs: 1
 (15 rows)
 
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index cd7ea1e6b58..4bfcbf4efb5 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -580,12 +580,13 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, List *rteperminfos,
 	 * Note we don't bother to avoid making duplicate list entries.  We could,
 	 * but it would probably cost more cycles than it would save.
 	 */
+	glob->allRelids = bms_add_member(glob->allRelids,
+										 list_length(glob->finalrtable));	
+	
 	if (newrte->rtekind == RTE_RELATION ||
 		(newrte->rtekind == RTE_SUBQUERY && OidIsValid(newrte->relid)))
 	{
 		glob->relationOids = lappend_oid(glob->relationOids, newrte->relid);
-		glob->allRelids = bms_add_member(glob->allRelids,
-										 list_length(glob->finalrtable));
 	}
 
 	/*
diff --git a/src/test/isolation/expected/eval-plan-qual.out b/src/test/isolation/expected/eval-plan-qual.out
index 05fffe0d570..d182bbe2722 100644
--- a/src/test/isolation/expected/eval-plan-qual.out
+++ b/src/test/isolation/expected/eval-plan-qual.out
@@ -1432,6 +1432,23 @@ a|b|c|   d
 (2 rows)
 
 
+starting permutation: updateforvalues updatevalues c1 c2 read
+step updateforvalues: UPDATE accounts SET balance = balance + 100;
+step updatevalues: 
+	set local enable_seqscan to 0;
+	UPDATE accounts SET balance = v.balance FROM (VALUES('checking', 610), ('savings', 620)) v(accountid, balance) WHERE accounts.accountid = v.accountid;
+ <waiting ...>
+step c1: COMMIT;
+step updatevalues: <... completed>
+step c2: COMMIT;
+step read: SELECT * FROM accounts ORDER BY accountid;
+accountid|balance|balance2
+---------+-------+--------
+checking |    610|    1220
+savings  |    620|    1240
+(2 rows)
+
+
 starting permutation: sys1 sysupd2 c1 c2
 step sys1: 
 	UPDATE pg_class SET reltuples = 123 WHERE oid = 'accounts'::regclass;
diff --git a/src/test/isolation/specs/eval-plan-qual.spec b/src/test/isolation/specs/eval-plan-qual.spec
index 80e1e6bb307..4870690ac5b 100644
--- a/src/test/isolation/specs/eval-plan-qual.spec
+++ b/src/test/isolation/specs/eval-plan-qual.spec
@@ -206,6 +206,8 @@ step sys1	{
 
 step s1pp1 { UPDATE another_parttbl SET b = b + 1 WHERE a = 1; }
 
+step updateforvalues { UPDATE accounts SET balance = balance + 100; }
+
 session s2
 setup		{ BEGIN ISOLATION LEVEL READ COMMITTED; }
 step wx2	{ UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance; }
@@ -318,6 +320,11 @@ step s2pp2 { PREPARE epd AS DELETE FROM another_parttbl WHERE a = $1; }
 step s2pp3 { EXECUTE epd(1); }
 step s2pp4 { DELETE FROM another_parttbl WHERE a = (SELECT 1); }
 
+step updatevalues { 
+	set local enable_seqscan to 0;
+	UPDATE accounts SET balance = v.balance FROM (VALUES('checking', 610), ('savings', 620)) v(accountid, balance) WHERE accounts.accountid = v.accountid;
+} 
+
 session s3
 setup		{ BEGIN ISOLATION LEVEL READ COMMITTED; }
 step read	{ SELECT * FROM accounts ORDER BY accountid; }
@@ -419,6 +426,9 @@ permutation simplepartupdate_route1to2 complexpartupdate_route_err1 c1 c2 read_p
 permutation simplepartupdate_noroute complexpartupdate_route c1 c2 read_part
 permutation simplepartupdate_noroute complexpartupdate_doesnt_route c1 c2 read_part
 
+# test EPQ recheck in UPDATE from VALUES_RTE, cf bug #19355
+permutation updateforvalues updatevalues c1 c2 read
+
 permutation sys1 sysupd2 c1 c2
 permutation sys1 sysmerge2 c1 c2
 
-- 
2.43.0



^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2026-01-01 21:58  Bernice Southey <[email protected]>
  parent: Bernice Southey <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Bernice Southey @ 2026-01-01 21:58 UTC (permalink / raw)
  To: Tender Wang <[email protected]>; +Cc: Dean Rasheed <[email protected]>; Amit Langote <[email protected]>; Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

> > Dean Rasheed wrote:
> >> I'm somewhat conflicted as to which approach is better. I think maybe
> >> there is less chance of other unintended side-effects if the set of
> >> RTEs included in PlannerGlobal.allRelids, unprunableRelids, and
> >> es_unpruned_relids is not changed. However, as it stands,
> >> PlannerGlobal.allRelids is misnamed (it should probably have been
> >> called "relationRelids", in line with the "relationOids" field).
> >> Making it actually include all RTEs would solve that.

I did some more digging (postgres source code is addictive).

Up until v56-0004-Defer-locking-of-runtime-prunable-relations-to-e.patch
[1], the existing unfiltered rtable is used to create
unprunableRelids.

+++ b/src/backend/optimizer/plan/planner.c
@@ -555,6 +555,8 @@ standard_planner(Query *parse, const char
*query_string, int cursorOptions,
  result->planTree = top_plan;
  result->partPruneInfos = glob->partPruneInfos;
  result->rtable = glob->finalrtable;
+ result->unprunableRelids = bms_difference(bms_add_range(NULL, 1,
list_length(result->rtable)),
+   glob->prunableRelids);

In v56 [2], the filtered allRelids was added. I think this is when the
bug was introduced, because the three places from Dean's patch are in
both versions.

I've looked much harder at the code (I'm still at
stumbling-around-in-the-dark-with-a-match level) and AFAICT, the two
approaches are very similar. I think equal effort is required to check
that PlannerGlobal.allRelids, unprunableRelids, and es_unpruned_relids
are correct, whichever approach is used. I can't find any missed cases
in either approach - with my matchlight.

Sorry for my ignorance: does a relId refer to a range table index and
a relation to a...for lack of a better word...table+?
I can really see these much appreciated extra features and
enhancements don't come cheap.

Thanks,
Bernice

[1] https://www.postgresql.org/message-id/CA%2BHiwqE7%2BiwMH4NYtFi28Pt9fT_gRW%2BGt-%3DCvOX%3DPkquo%3DAN8...
[2] https://www.postgresql.org/message-id/CA%2BHiwqHRRFQN6yZ54fBydOTM6ncqZBCmewZ6n519RjRdDsO44g%40mail.g...

^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2026-01-07 09:45  Amit Langote <[email protected]>
  parent: Bernice Southey <[email protected]>
  0 siblings, 1 reply; 12+ messages in thread

From: Amit Langote @ 2026-01-07 09:45 UTC (permalink / raw)
  To: Bernice Southey <[email protected]>; +Cc: Tender Wang <[email protected]>; Dean Rasheed <[email protected]>; Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

Hi Bernice,

On Fri, Jan 2, 2026 at 6:58 AM Bernice Southey
<[email protected]> wrote:
> > > Dean Rasheed wrote:
> > >> I'm somewhat conflicted as to which approach is better. I think maybe
> > >> there is less chance of other unintended side-effects if the set of
> > >> RTEs included in PlannerGlobal.allRelids, unprunableRelids, and
> > >> es_unpruned_relids is not changed. However, as it stands,
> > >> PlannerGlobal.allRelids is misnamed (it should probably have been
> > >> called "relationRelids", in line with the "relationOids" field).
> > >> Making it actually include all RTEs would solve that.
>
> I did some more digging (postgres source code is addictive).
>
> Up until v56-0004-Defer-locking-of-runtime-prunable-relations-to-e.patch
> [1], the existing unfiltered rtable is used to create
> unprunableRelids.
>
> +++ b/src/backend/optimizer/plan/planner.c
> @@ -555,6 +555,8 @@ standard_planner(Query *parse, const char
> *query_string, int cursorOptions,
>   result->planTree = top_plan;
>   result->partPruneInfos = glob->partPruneInfos;
>   result->rtable = glob->finalrtable;
> + result->unprunableRelids = bms_difference(bms_add_range(NULL, 1,
> list_length(result->rtable)),
> +   glob->prunableRelids);
>
> In v56 [2], the filtered allRelids was added. I think this is when the
> bug was introduced, because the three places from Dean's patch are in
> both versions.
>
> I've looked much harder at the code (I'm still at
> stumbling-around-in-the-dark-with-a-match level) and AFAICT, the two
> approaches are very similar. I think equal effort is required to check
> that PlannerGlobal.allRelids, unprunableRelids, and es_unpruned_relids
> are correct, whichever approach is used. I can't find any missed cases
> in either approach - with my matchlight.

Good catch on the history -- that's exactly when the bug was
introduced. I'd say Dean's approach is easier to verify since it's a
simple check at the point of use, rather than ensuring allRelids is
built correctly across all planner code paths.

> Sorry for my ignorance: does a relId refer to a range table index and
> a relation to a...for lack of a better word...table+?

Depends on the context, but "relid" in the planner internal data
structures refer to RT index.  In the planner output data structures
(plan nodes, etc.), we typically use "rti" or "rtindex".

-- 
Thanks, Amit Langote






^ permalink  raw  reply  [nested|flat] 12+ messages in thread

* Re: BUG #19355: Attempt to insert data unexpectedly during concurrent update
@ 2026-01-07 12:16  Amit Langote <[email protected]>
  parent: Amit Langote <[email protected]>
  0 siblings, 0 replies; 12+ messages in thread

From: Amit Langote @ 2026-01-07 12:16 UTC (permalink / raw)
  To: Bernice Southey <[email protected]>; +Cc: Tender Wang <[email protected]>; Dean Rasheed <[email protected]>; Bh W <[email protected]>; Laurenz Albe <[email protected]>; [email protected]

On Wed, Jan 7, 2026 at 6:45 PM Amit Langote <[email protected]> wrote:
> On Fri, Jan 2, 2026 at 6:58 AM Bernice Southey
> <[email protected]> wrote:
> > I've looked much harder at the code (I'm still at
> > stumbling-around-in-the-dark-with-a-match level) and AFAICT, the two
> > approaches are very similar. I think equal effort is required to check
> > that PlannerGlobal.allRelids, unprunableRelids, and es_unpruned_relids
> > are correct, whichever approach is used. I can't find any missed cases
> > in either approach - with my matchlight.
>
> Good catch on the history -- that's exactly when the bug was
> introduced.

I remembered more on why I insisted on putting only relations with OID
(RTE_RELATION and RTE_SUBQUERY with relid set (views)) into
PlannerGlobal.allRelids and thus PlannedStmt.unprunableRelids. The
reason is that I did not want loops iterating over unprunableRelids to
have to skip relations that do not have an OID. For example,
AcquireExecutorLocks() currently iterates over the rtable and skips
non-RELATION RTEs; in a now-reverted commit (525392d57) I had changed
it to iterate over unprunableRelids instead, which avoided the need
for that skip logic entirely, because unprunableRelids by design only
contains OID-bearing relations:

-       foreach(lc2, plannedstmt->rtable)
+       while ((rtindex = bms_next_member(plannedstmt->unprunableRelids,
+                                         rtindex)) >= 0)
        {
-           RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2);
+           RangeTblEntry *rte = list_nth_node(RangeTblEntry,
+                                              plannedstmt->rtable,
+                                              rtindex - 1);

-           if (!(rte->rtekind == RTE_RELATION ||
-                 (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid))))
-               continue;
+           Assert(rte->rtekind == RTE_RELATION ||
+                  (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)));

This is why adding all RTEs to allRelids, as Tender suggested, would
be problematic -- it would defeat the design intent that allows loops
over unprunableRelids to skip the filtering logic. While there are no
such loops today (I reverted the patch that added one), keeping this
design makes sense. Dean's fix addresses the buggy bms_is_member()
check directly, without changing allRelids - albeit the name's a bit
misleading as has been mentioned.

--
Thanks, Amit Langote

^ permalink  raw  reply  [nested|flat] 12+ messages in thread

end of thread, other threads:[~2026-01-07 12:16 UTC | newest]

Thread overview: 12+ messages (download: mbox mbox.gz follow: Atom feed)
-- links below jump to the message on this page --
2025-12-15 01:40 BUG #19355: Attempt to insert data unexpectedly during concurrent update PG Bug reporting form <[email protected]>
2025-12-15 06:25 ` Laurenz Albe <[email protected]>
2025-12-22 14:51   ` Bh W <[email protected]>
2025-12-22 22:37     ` Dean Rasheed <[email protected]>
2025-12-24 08:08       ` Amit Langote <[email protected]>
2025-12-24 12:07         ` Tender Wang <[email protected]>
2025-12-24 23:11           ` Dean Rasheed <[email protected]>
2025-12-25 04:48             ` Tender Wang <[email protected]>
2025-12-31 16:16               ` Bernice Southey <[email protected]>
2026-01-01 21:58                 ` Bernice Southey <[email protected]>
2026-01-07 09:45                   ` Amit Langote <[email protected]>
2026-01-07 12:16                     ` Amit Langote <[email protected]>

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox