public inbox for [email protected]  
help / color / mirror / Atom feed
From: Nisha Moond <[email protected]>
To: Amit Kapila <[email protected]>
Cc: shveta malik <[email protected]>
Cc: Dilip Kumar <[email protected]>
Cc: vignesh C <[email protected]>
Cc: Peter Smith <[email protected]>
Cc: Masahiko Sawada <[email protected]>
Cc: Bharath Rupireddy <[email protected]>
Cc: PostgreSQL Hackers <[email protected]>
Subject: Re: Proposal: Conflict log history table for Logical Replication
Date: Mon, 22 Jun 2026 16:32:16 +0530
Message-ID: <CABdArM6r0u6e2HQY3CQ15uZwOD2iTtVndecsNyK4R5tX9+eaNQ@mail.gmail.com> (raw)
In-Reply-To: <CAA4eK1JhZYRMP_YYa1j3uAK6L4v057JDuM0+YLABOgAOYuwM8Q@mail.gmail.com>
References: <CAFiTN-u5D5o_AGNbHRZHaOqAMWkxLf+hSk_r9X3gv6HbLOB5+g@mail.gmail.com>
	<CALj2ACViThGQDYi-yeqUeHqG2Pozn2AiyvtDtjE6zhhbM0KsEA@mail.gmail.com>
	<CAA4eK1+44b3vd_OWfiaVNtjf5Njb5cek09pmKRmttBByeg0NoA@mail.gmail.com>
	<CAFiTN-v3L0WacCDx5dkOSonaZQbJfstXL4HrCPD1ahRdUsRnSg@mail.gmail.com>
	<CALj2ACW63uuxh0fSoxEAF8OMWhz1dJKSkp268WJDzf5BUqCf5g@mail.gmail.com>
	<CAFiTN-s9WWLOhW1TO27NtJwGf0bh2+MWyp3NEkZFeN_S5_p_rA@mail.gmail.com>
	<CAA4eK1LxnsEx5sMbQkK5MHAgXKPROMQXQ0n=fKMwz+UsfKQaMQ@mail.gmail.com>
	<CAD21AoDj+c4LXf2y4ESR-gVyv9d8V0G4R8R9pn-PcmT5zPzYcg@mail.gmail.com>
	<CAA4eK1KokmAwNOL6bS-ip_E3F96PiQTjC4j-M+5vD1T6uUyi3Q@mail.gmail.com>
	<CAFiTN-vFKE8E_N6h+peX9DP92mxCeFdm5A9Esn4DkLmNcZ-dOA@mail.gmail.com>
	<CAFiTN-shLYf-fOTQ_dBf3Xfx05gxs_8d93MHZXyyz6w2Bg5geQ@mail.gmail.com>
	<CAFiTN-tEgkKQHUikn6iBFCYf7XOObR7ncUq=OVh7WEk=6P4ymw@mail.gmail.com>
	<CAFiTN-tQiakd8m+-d6WN6RpJXSv_JcropZ2oGzme4d1JudQhYg@mail.gmail.com>
	<CAJpy0uDKbYWt+YPADj=4fHEvrGEWgnG1n_YsiGT_EZiZf0VSAw@mail.gmail.com>
	<CAFiTN-t82BiXen+HfdR9jZyOpuSO92xonnUK=khXsiZWBfOxMA@mail.gmail.com>
	<CAJpy0uAu2paxGAEffD=vaBTW9Jqbtxxawb8K8FgiASfeKPnGog@mail.gmail.com>
	<CAJpy0uC0ZWgHOivJ102A1fMkppwK3RuSMafRPKyjwkmJrjhVUw@mail.gmail.com>
	<CAFiTN-vFV9-zajrwjYHYyFnyQsooOAXW4CpxB5f-iT3APjOtoQ@mail.gmail.com>
	<CAJpy0uBeU1dZgaqsSVKc=P=EVUKxRgVuHR8jDXFL-HLibbE-kQ@mail.gmail.com>
	<CAA4eK1+FOkOxhzVLAnDymoNjp4i98H-L1+ZsWDgJEv-ndnTzTA@mail.gmail.com>
	<CAFiTN-sVK6Bp+BawCJU_WpAXQSTX4OkKmce5EE4YNBgD-XSjZw@mail.gmail.com>
	<CAA4eK1LbjV0bctib9wUnBpEkC+2rZFPnGuRtrKuc5AtUAzum+A@mail.gmail.com>
	<CAFiTN-vq50N3QP9p3_SH+tJ8Pn=uRDb0X4qEcQZYcGW9AX88rQ@mail.gmail.com>
	<CAFiTN-u3+zRGPESP5kUUfa6NxaWh1HL-gd1225KJ0Uvzi1urow@mail.gmail.com>
	<CAA4eK1L4iNk6mNTC83PbYrRfUdtivH4U961PkdFfOO7mvc=USg@mail.gmail.com>
	<CAFiTN-v+Mh64UfR5zb5rwgyGm6HS80XRSZ_XeaWkg8=+s9o3Kg@mail.gmail.com>
	<CAFiTN-s3ZFHteQsiC3H4=AjTWxuwN-w69XQ3xL5X6YOMTua4pA@mail.gmail.com>
	<CAJpy0uDe724nY59j-8hMapZ_Fru1Wo-NucF4Ea1B3Jrw=+J+UQ@mail.gmail.com>
	<CAFiTN-uR=86L_5tyiA7n73EXCSCuDfQKfL5O=c8n7zZom8_ONQ@mail.gmail.com>
	<CAD21AoDfOS-J0M9WbM3D20eGbSPzbfLQ-9XoYkxO4AZ9twqyvg@mail.gmail.com>
	<CAFiTN-vMTg2X7vwfHLr5Gvy8ViV63_iaEcpHmM8V5GpA9-u8cg@mail.gmail.com>
	<CAA4eK1+b2Ws0e_ZYJsgZAPn7VWndxAK_YM_QMKcfXst3e7F6Jg@mail.gmail.com>
	<CAFiTN-v6hFKMPrSyTBsz=AtEETYMbOxrqvhZJsPQqKgQc4WCLw@mail.gmail.com>
	<CAA4eK1KV3rYkaxys5fh-PtE9kq5xrFbiaRpOSPoRgQG494ek+g@mail.gmail.com>
	<CAFiTN-utvu=QjY1QQ1a_TvkpkpvesMWo9M8wTFYLaOTPdpOJvw@mail.gmail.com>
	<CAA4eK1+HoSOEqNwT3twArPNx4_D7hSUoEg2LnYhX8n9iUwhXgQ@mail.gmail.com>
	<CAFiTN-tqmsfW0Sk=1RhzuduxqLrf9KEc8VOvBae+4aYxWTJwuA@mail.gmail.com>
	<CAA4eK1JmCQ=DHe3HsqpX+P3mGDUd_Z7E7oAxdstK6822W6tuCw@mail.gmail.com>
	<CAFiTN-uE4eAUYewuq3c5deAt3TtVork+H6rkUHRv68cOGr5rmQ@mail.gmail.com>
	<CAFiTN-sJbhPX+LbA8YuQeYJpfGA2XA+OKXf8jCm04RoJOyzLvw@mail.gmail.com>
	<CAJpy0uBPOyWj9itFjHzGXfrUuYS8KGmAvgdcV_9FPjWZ0EZz_w@mail.gmail.com>
	<CAFiTN-s=iLE4qM4qmw9yXKqW09R_c_HqaSGeZXJ2EaTVfXss+g@mail.gmail.com>
	<CAA4eK1KYo0vZpPSRc_4gVpa06-J39gxjs3tHFyckgkBfYJSfFA@mail.gmail.com>
	<CAFiTN-vrKc6OWzrg6yvpwYcj79k=zkrDp3uwiZzjwrWLJAq6tw@mail.gmail.com>
	<CAA4eK1LmvrfEgn1NUZZ=E3yMCjQdNZ5=_SBEry73-EmF6jM_PQ@mail.gmail.com>
	<CAFiTN-vjfub5b3PqPQzfOw9BSjm8jt28ott+Hoz9CrRxJHzYkg@mail.gmail.com>
	<CAFiTN-v=ANapYvRK+SOy2wJb4CSuD6Vb6_bTGuReM9Dv+3tucA@mail.gmail.com>
	<CALDaNm1zEYoSdf2Ns-=UJRw95E5sbfpB0oaNUWtRJN27Q1Knhw@mail.gmail.com>
	<CALDaNm3USsXVNBsfdpkp60HVgrTV4taWMk1xZYNBa7QUF=V0jg@mail.gmail.com>
	<CAFiTN-sNg9ghLNkB2Kn0SwBGOub9acc99XZZU_d5NAcyW-yrEg@mail.gmail.com>
	<CAJpy0uAF3EYcYdpTHdKMeXfvaPbNvnWrZUATrSLL1hqjao=33A@mail.gmail.com>
	<CAFiTN-uikggCKp2LscTorKY5d3KF9j93DW0xebDcRX86G+ZsSw@mail.gmail.com>
	<CAJpy0uDaOoVK8S3_xxTAcTDpfK1AY7tApw7nPOZG_gUz+DMi=Q@mail.gmail.com>
	<CAA4eK1+AdeC5B9xrAXSKWGtTh-0d8xdD=fZttmOBm+c8o8thAQ@mail.gmail.com>
	<CAFiTN-skBQAeuzuUd+PDK0Gqc8g+4x9ypBMwJhOrmW8ZCFKGSA@mail.gmail.com>
	<CAJpy0uCdrsW5T+okq7xTOVxagje7FW3DOeY5B0CGKYa5VqF_tQ@mail.gmail.com>
	<CAFiTN-u+_mFj9caYYFO7=_YHFXk5y=vvOm2H2=5hctYktmAVGA@mail.gmail.com>
	<CALDaNm1aivk9KgQ5daeF6YZzuE+0wWc2yb7wb6qikNyvfPN0Sg@mail.gmail.com>
	<CAJpy0uD6fTEUYJx3+yDbvB=VW7c5AaGoeSd7iwHdYYO=kYGn3g@mail.gmail.com>
	<CALDaNm2YOOdJ25X1sJ+DYz37K6Qi4g0ZNFHb_pQMF9UqancnEA@mail.gmail.com>
	<CAHut+PtMS5bENS0DVtBj+s3kUEOq61+hSkqLODjFB78egB0imQ@mail.gmail.com>
	<CAFiTN-s_M83sfs+MHHbUrMesjsCPN4JWxY5MChCEiY1U-u7=9g@mail.gmail.com>
	<CAFiTN-vj8NTm9w_L2XdhxJCub_RZw__YVUgfXa1B1kJzJctRNw@mail.gmail.com>
	<CAJpy0uBDLnfhuSiev8W9ZMFNTzUmqhds2dKayUpLoN-z1dtsLA@mail.gmail.com>
	<CAFiTN-uL9f0X+=Ep4BbAPvaTJA7S4XHM--G4BsnPJw4uJW7EGQ@mail.gmail.com>
	<CAJpy0uDG=t-y_m8t1zpBzfz9viP3K8dyQgkruaraVT85UtTkrg@mail.gmail.com>
	<CAFiTN-tR8Rhs8uhfbck0Ac4dd1MopvvYgjK39nWyNXRp9Z3Qww@mail.gmail.com>
	<CAA4eK1Kf15UpNmpTTE2XyX=9PE_oTpOoy5xqg3rFWbxwwP4Rbg@mail.gmail.com>
	<CAFiTN-tNqb0vjuadDz-as67ksSXa=aEK+JW=4b54RVmkUK1m2Q@mail.gmail.com>
	<CAFiTN-vDCxx6ydUFo59L8qNBbierg4as3TGPPiavR7UZjYurzA@mail.gmail.com>
	<CAHut+PsWms218ENALnytLEV4NpxjOrAYhChLDaMaeE65-vNgrQ@mail.gmail.com>
	<CAFiTN-v9i9RmDvdUmtMUow4=b+nr0k7LKMyEQ+6ZF=EVdfBhBA@mail.gmail.com>
	<CALDaNm2YTKwPDjt9OV49RgM0zbkWhMhNu228bj_7f+zzcPb-ew@mail.gmail.com>
	<CAFiTN-t_4XvofM3an-WmykqnPE+9wf9U+o2M7p1CWd9eXkN88Q@mail.gmail.com>
	<CAHut+PuaqNDfDu_3xkZR4OYxw-B7ew_WjpLXCBvMcSBJz2K6Xg@mail.gmail.com>
	<CAFiTN-uqNN9S_hRuda_th5MEpywa15g+XO00yM6tNJ-spGRRJw@mail.gmail.com>
	<CABdArM6QxXatkGefTHy__HgaYHBvbKesffeXzT8Vn-kvcvGK4w@mail.gmail.com>
	<CAFiTN-tgMWr=TGPhs9BxaPuSC_jhM7sJJ4fHedE5W6=h40jLfA@mail.gmail.com>
	<CABdArM5fgzfyC2mH3YGB8t8cJBHWqAG1BS6rJMk7mX-8=9d=Cg@mail.gmail.com>
	<CABdArM568KF4WXdFX_aZkCiDK8R71Wpep0gC2a+cV8BMobwkrg@mail.gmail.com>
	<CAFiTN-vQ0tu18BD3UmKPb0rzZyFMQAVgGbdpMA8iYLX7PZOqOA@mail.gmail.com>
	<CALDaNm20PDtmG2E3qaTC+YuL5twv+c9k573wL3sb=OwgmZphxQ@mail.gmail.com>
	<CAA4eK1LhOHa_TEznw+gFoq+w0vMvvsDG2g9Xq8Mwa8xZMY73og@mail.gmail.com>
	<CAFiTN-vPDqrQ2rHykNgd+groFxqwBYFQF97R-Co2EmtUkV6MTg@mail.gmail.com>
	<CAFiTN-vsd=wNiEPXPQhZnipAb--+mBUC01M-pcjBjbRockgCUA@mail.gmail.com>
	<CAJpy0uCjSq_gUCJBfURhqtB6bLvkKSUL-sVXpaGKjEapv5+t+Q@mail.gmail.com>
	<CAFiTN-uZ-LaStAY3NuCY-nb7GCB9joiHX7HtHEMseJ0xfnqVSg@mail.gmail.com>
	<CAFiTN-vhJxRW5NQ628oidnk0KtHwKt11dW9-+vxqpXLTgjiYiA@mail.gmail.com>
	<CALDaNm1cJURibYKY4+DuNosjM72C9oGheUF-roMyff__+AsKBw@mail.gmail.com>
	<CAJpy0uD1_77TDAFc4jE-94X-WUus7Q3gGU0pXfC+Tticq1hFvA@mail.gmail.com>
	<CAA4eK1LFcSc4XCj4mU-cv27F_6n6=+ehJ=YAsAnyBbz4Sv_tVg@mail.gmail.com>
	<CAFiTN-s5ZtjXKrSbam7TNWJ9Ax-kCancXcestAnx2by7dK0-UA@mail.gmail.com>
	<CAFiTN-u=Da32mXyz8jocEGtuLSG4ccXXj_aEzUTPp2zkLb3MVA@mail.gmail.com>
	<CALDaNm1qY5e0thfsDB2uWXqZn4hgTWTxiUDwcF1hWA-jodsKYg@mail.gmail.com>
	<CALDaNm1nFtv3dtdRdbqWo2Rf_av7XbxDfK1Orqjcqs_Su_cLRQ@mail.gmail.com>
	<CABdArM7R498qC5Fr42aU_q-2Sc5QsT4dyKgmO_f6Uy=8oCAFXA@mail.gmail.com>
	<CAFiTN-sRZ+Z_9B3ue2L4zkbcfmPjjcAjcR1C+px1PyAs+HGsSg@mail.gmail.com>
	<CAFiTN-sdcjf9xJ2M-=ab5e4y662tTmFFiP4gHL44tC9PcQozcw@mail.gmail.com>
	<CALDaNm2WNjaNxUijVkvT6y69D62rfCu8OMwU-Pf-84un2r_=ig@mail.gmail.com>
	<CAHut+PvEP5uUR13xJ3gbNKGU49=Rg32DXMGZ2wL9jTcKHyN_=Q@mail.gmail.com>
	<CALDaNm3Jb5AQTsFJFxYZZJCaheT7qToCZkEALfW-vsMMFxjOyQ@mail.gmail.com>
	<CAHut+PtQn5U9i00qvBmjo0KBxyb+ZmBb38NzF91KnX4J86Jg_g@mail.gmail.com>
	<CALDaNm1a1gzy0L38U394_4OFwGUS8ALgSONYj++VLimY0g9piQ@mail.gmail.com>
	<CABdArM5X63AdtS99QKGjVijUd_Q_dV8QUDSo4nTHKJjn3JwtAg@mail.gmail.com>
	<CAA4eK1+h=QV4Zi=PW8Zt2D6be5ki5Mu2HgdXcfUophptx6Mt_A@mail.gmail.com>
	<CAFiTN-s-tuxar9Dp5He0CFa1pzfy1fmiwcBj6PtwD0hDodE5ng@mail.gmail.com>
	<CAFiTN-sx=k+Th=uYsrLcS6YMZbPVi9Wrggn1w2Nzf9MLEU7YRQ@mail.gmail.com>
	<CAFiTN-u5pcgAhXyJgj+p7-xmShtp0i8xA000tzjCLFQp_zMXUA@mail.gmail.com>
	<CAFiTN-tRpS7b3qFqckqFtHETj0jyzj-8SxC1arjfwf-hQd47PQ@mail.gmail.com>
	<CABdArM5Ka_m_GWhL_zZbeDPKmL-Wezwb4A-NHnO-v-fRDuhA-Q@mail.gmail.com>
	<CAFiTN-sqEMAbZ2pTt=zMa=918NV7HVeXF4bCOF+swtzKnTy5yQ@mail.gmail.com>
	<CALDaNm0WX0Vqoy2UQZh-2TpWraf4OYn28kWe9aGR=vxKwLA+bw@mail.gmail.com>
	<CAJpy0uAA_XszCAcoBuCUM3VobD39DbMDwCPUT+XW7wFfE+_E8w@mail.gmail.com>
	<CAFiTN-urKwsdyFvwz1go_C7jJFtLA8TJEhnaECAjuqkRk_1cXA@mail.gmail.com>
	<CAJpy0uBhJGqD+OyA9Uk8bHyk61XWHEf3Le1QxkotwOLcQCqaZA@mail.gmail.com>
	<CAFiTN-sbAXS6deUarPDunj2U+A6Dvhw_TASy4oMv4Tc63-_T9g@mail.gmail.com>
	<CAJpy0uBaW7ziTsHOu_z37-epihxx3qGMjqdV+_-Z-RFjq5EOGg@mail.gmail.com>
	<CAFiTN-sxVPbuqHjz99NGTz5UU0xgegsvpRa6=NkbP8_iW+X6-A@mail.gmail.com>
	<CAJpy0uBs6D9ojMpz4MWgrxDbvRxqnvN+B+JnMNezBtuvhk_j9A@mail.gmail.com>
	<CAFiTN-urmbTtk40RfLU8UMYtzk-_DLXwui3_G+TWp6XYKBphjw@mail.gmail.com>
	<CABdArM5dtMEUw14-aDSht2Vh1tsgO67t_ZU8VQp=Ut7MwK5aEQ@mail.gmail.com>
	<CAJpy0uBSY7zTH=4TvAOS=kj9vivBUc9NO+Vp6KNw-Na9RiAsMg@mail.gmail.com>
	<CAFiTN-vqyo2=pgA_jUyQoWrOwUXxw1s3aYHcZBr_EHsztZN9jg@mail.gmail.com>
	<CAJpy0uDGuaEvBym5VnE4mXJ_meo3jR7uG=c6wVbDAiuTjHcwrg@mail.gmail.com>
	<CAA4eK1JhZYRMP_YYa1j3uAK6L4v057JDuM0+YLABOgAOYuwM8Q@mail.gmail.com>

On Sun, Jun 21, 2026 at 7:19 PM Amit Kapila <[email protected]> wrote:
>
> On Thu, Jun 18, 2026 at 9:33 AM shveta malik <[email protected]> wrote:
> >
> > On Tue, Jun 16, 2026 at 6:54 PM Dilip Kumar <[email protected]> wrote:
> > >
> > > IMHO we should just log WARNING and continue the apply worker on
> > > conflict insertion failure, lets see what other thinks on this.
> > >
> >
> > I have the same opinion. Allowing CLT to block the apply worker would
> > be undesirable; CLT is a history/logs collection feature and should
> > not interrupt core logical replication work.
> >
>
> I think the insert can fail in rare cases like disk getting full while
> writing WAL or some internal memory ERROR and the ERROR could be
> persistent which means the LOG will be filled with the same WARNING if
> there are many conflicts. Also, users may not like missing out on
> conflict information. So, we can ERROR out and let users fix the
> situation. Additionally, the nested try-catch to downgrade ERROR to
> WARNING also looks ugly and a source of future bugs and maintenance
> burden. The attached patch tries to fix this by ERRORing out on
> insertion failure and attaching the required conflict info as a
> context of ERROR. The patch also improved the ReportApplyConflict()
> non-ERROR paths by displaying the conflict information in server LOGs
> before inserting the same into CLT so that if insertion fails, the
> complete conflict info can be present in server LOGs. See
> v52-1-amit.Improve-error-handling-for-conflict-log-table-ins.
>
> Additionally, there is another problem with 0003 where when a parallel
> apply worker hits an ERROR-level conflict it logs the conflict to the
> conflict log table in a new transaction in its error path, after
> aborting the failed apply transaction.  But the leader detects worker
> failure in pa_wait_for_xact_finish() by waiting on the worker's
> transaction lock, and AbortOutOfAnyTransaction() releases that lock:
> the leader unblocks, sees a non-finished state, raises "lost
> connection to the logical replication parallel apply worker", and
> tears the worker down -- which can SIGTERM it mid-insert and lose the
> conflict log row, besides being a misleading message. The attached
> top-up patch v52-2-amit.fix_parallel_apply_logging fixes that by
> introducing PARALLEL_TRANS_ERROR state.

I reproduced the above issue and verified the fix for it in
v52-2-amit.fix_parallel_apply_logging. Here is a TAP test for the
same.
The attached top-up patch applies on top of the latest v53-0005 patch.

--
Thanks,
Nisha

From bae4f16a7a6a80d2d27ba26a7eac70b9dc089785 Mon Sep 17 00:00:00 2001
From: Nisha Moond <[email protected]>
Date: Mon, 22 Jun 2026 11:50:12 +0530
Subject: [PATCH v53] Add TAP test for parallel-apply deferred CLT insert race

Test the race where, on an ERROR-level conflict, a parallel apply (PA)
worker logs the deferred conflict row in a fresh transaction in its
PG_CATCH path.  AbortOutOfAnyTransaction() in the PG_CATCH releases
the worker's transaction lock before the deferred insert runs, so the
leader's pa_wait_for_xact_finish() unblocks, sees a non-finished state,
raises the "lost connection to the logical replication parallel
apply worker", and SIGTERMs the PA mid-insert -- the deferred conflict
row is lost.

Also verifies that PA correctly inserts the conflict into the CLT and also
reports it in the server log with fix.
---
 src/backend/replication/logical/conflict.c    |   8 +
 src/test/subscription/meson.build             |   1 +
 .../t/039_pa_conflict_log_lock_wait.pl        | 204 ++++++++++++++++++
 3 files changed, 213 insertions(+)
 create mode 100644 src/test/subscription/t/039_pa_conflict_log_lock_wait.pl

diff --git a/src/backend/replication/logical/conflict.c b/src/backend/replication/logical/conflict.c
index c2c15f055e6..6ec57180813 100644
--- a/src/backend/replication/logical/conflict.c
+++ b/src/backend/replication/logical/conflict.c
@@ -31,6 +31,7 @@
 #include "storage/lmgr.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
+#include "utils/injection_point.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_lsn.h"
@@ -484,6 +485,13 @@ ProcessPendingConflictLogTuple(void)
 	StartTransactionCommand();
 	PushActiveSnapshot(GetTransactionSnapshot());
 
+	/*
+	 * Test hook: pause here so a TAP test can take a conflicting lock on
+	 * the conflict log table before this transaction tries to open it.
+	 * See src/test/subscription/t/039_pa_conflict_log_lock_wait.pl.
+	 */
+	INJECTION_POINT("clt-pending-flush-before-open", NULL);
+
 	/* Open conflict log table and insert the tuple */
 	conflictlogrel = GetConflictLogDestAndTable(&dest);
 	Assert(conflictlogrel);
diff --git a/src/test/subscription/meson.build b/src/test/subscription/meson.build
index e71e95c6297..225f90a37b3 100644
--- a/src/test/subscription/meson.build
+++ b/src/test/subscription/meson.build
@@ -48,6 +48,7 @@ tests += {
       't/036_sequences.pl',
       't/037_except.pl',
       't/038_walsnd_shutdown_timeout.pl',
+      't/039_pa_conflict_log_lock_wait.pl',
       't/100_bugs.pl',
     ],
   },
diff --git a/src/test/subscription/t/039_pa_conflict_log_lock_wait.pl b/src/test/subscription/t/039_pa_conflict_log_lock_wait.pl
new file mode 100644
index 00000000000..d69f5b43656
--- /dev/null
+++ b/src/test/subscription/t/039_pa_conflict_log_lock_wait.pl
@@ -0,0 +1,204 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+# Test that a parallel apply (PA) worker correctly inserts a deferred
+# conflict-log tuple even when, by the time it reaches
+# ProcessPendingConflictLogTuple(), the conflict log table is held under
+# ACCESS EXCLUSIVE by another session.
+#
+# The window we want to exercise is narrow: PA must already be past
+# ReportApplyConflict() (so the error has fired and PA is in PG_CATCH),
+# and the locker must take the CLT lock *before* PA reaches the second
+# CLT open inside ProcessPendingConflictLogTuple().  An injection point
+# pauses PA at exactly that point so the locker can grab the lock first;
+# without it, the locker either takes the lock too early (PA blocks in
+# the inline CLT open inside ReportApplyConflict, before the error has
+# fired) or too late (PA inserts before the lock is taken).
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+# ---------------------------------------------------------------------
+# Set up publisher and subscriber.  Force every transaction to stream so
+# the conflict is handled by a parallel apply worker rather than the
+# leader.
+# ---------------------------------------------------------------------
+my $node_pub = PostgreSQL::Test::Cluster->new('publisher');
+$node_pub->init(allows_streaming => 'logical');
+$node_pub->append_conf('postgresql.conf', q{
+debug_logical_replication_streaming = immediate
+logical_decoding_work_mem = 64kB
+});
+$node_pub->start;
+
+my $node_sub = PostgreSQL::Test::Cluster->new('subscriber');
+$node_sub->init;
+$node_sub->append_conf('postgresql.conf', q{
+shared_preload_libraries = 'injection_points'
+max_logical_replication_workers = 4
+max_parallel_apply_workers_per_subscription = 2
+});
+$node_sub->start;
+
+# Replicated table; the pre-existing row on the subscriber is what makes
+# the publisher's INSERT (id=1) trigger an INSERT_EXISTS conflict.
+$node_pub->safe_psql('postgres', q{
+	CREATE TABLE t (id int PRIMARY KEY, val text);
+	ALTER TABLE t REPLICA IDENTITY FULL;
+	CREATE PUBLICATION p FOR TABLE t;
+});
+
+$node_sub->safe_psql('postgres', q{
+	CREATE TABLE t (id int PRIMARY KEY, val text);
+	INSERT INTO t VALUES (1, 'pre-existing');
+	CREATE EXTENSION injection_points;
+});
+
+my $pub_connstr = $node_pub->connstr . ' dbname=postgres';
+$node_sub->safe_psql('postgres', qq{
+	CREATE SUBSCRIPTION s
+	CONNECTION '$pub_connstr'
+	PUBLICATION p
+	WITH (streaming = parallel,
+	      conflict_log_destination = 'all',
+	      disable_on_error = true);
+});
+
+$node_sub->wait_for_subscription_sync($node_pub, 's');
+
+# ---------------------------------------------------------------------
+# Send a non-conflicting INSERT and then wait until pg_subscription_rel
+# reaches 'r' (ready) on every relation.  pa_can_start() requires
+# AllTablesyncsReady(), which returns true only when every
+# pg_subscription_rel row is 'r'.  The 's' (syncdone) -> 'r' transition
+# fires inside ProcessSyncingTablesForApply, which only flips the state
+# when the apply worker's last_received LSN has advanced past the
+# tablesync end LSN -- so we need a triggering commit on the publisher
+# to drive last_received forward.  Without this step, the conflict txn
+# below would arrive while the table is still 's', pa_can_start() would
+# return false, the leader would spool to file and apply serially, and
+# no parallel apply worker would ever spawn.
+# ---------------------------------------------------------------------
+$node_pub->safe_psql('postgres', "INSERT INTO t VALUES (1000, 'warmup');");
+$node_sub->poll_query_until('postgres',
+	"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r');"
+) or die "subscription tables did not reach READY state";
+
+# ---------------------------------------------------------------------
+# Look up the per-subscription CLT name.
+# ---------------------------------------------------------------------
+my $sub_oid = $node_sub->safe_psql('postgres',
+	"SELECT oid FROM pg_subscription WHERE subname = 's'");
+my $clt = "pg_conflict.pg_conflict_log_$sub_oid";
+note "conflict log table for subscription s: $clt";
+
+# ---------------------------------------------------------------------
+# Arm the injection point.  This pauses the PA worker inside
+# ProcessPendingConflictLogTuple() — i.e. *after* the error has fired
+# and the PG_CATCH has run, *before* the second open of the CLT.  This
+# is the exact window the deferred-insert path needs to be tested in.
+# ---------------------------------------------------------------------
+$node_sub->safe_psql('postgres',
+	"SELECT injection_points_attach('clt-pending-flush-before-open', 'wait');");
+
+# ---------------------------------------------------------------------
+# Drive the conflict.  PA receives the streamed txn, hits INSERT_EXISTS
+# inside ReportApplyConflict (which opens/closes the CLT cleanly while
+# preparing the deferred tuple), then ereport(ERROR) fires, PG_CATCH
+# runs, and PA enters ProcessPendingConflictLogTuple — where it pauses
+# at the injection point.
+# ---------------------------------------------------------------------
+my $log_offset = -s $node_sub->logfile;
+
+$node_pub->safe_psql('postgres', q{
+	BEGIN;
+	INSERT INTO t SELECT g, repeat('x', 1000) FROM generate_series(2, 200) g;
+	INSERT INTO t VALUES (1, 'conflict');
+	COMMIT;
+});
+
+# Wait until PA is parked at the injection point.
+$node_sub->wait_for_event('logical replication parallel worker',
+	'clt-pending-flush-before-open');
+
+# ---------------------------------------------------------------------
+# Now take ACCESS EXCLUSIVE on the CLT.  TRUNCATE is permitted on CLTs;
+# At this point the CLT is empty, so the TRUNCATE is effectively a no-op
+# that just acquires the lock.
+# Because PA is paused at the injection point, this lock is guaranteed
+# to be acquired *before* PA tries to open the CLT.
+# ---------------------------------------------------------------------
+my $locker = $node_sub->background_psql('postgres');
+$locker->query_until(qr/locker_ready/, qq{
+	\\echo locker_ready
+	BEGIN;
+	TRUNCATE $clt;
+});
+
+# ---------------------------------------------------------------------
+# Wake the PA from the injection point.  It will now try to open the
+# CLT inside ProcessPendingConflictLogTuple and block on the lock the
+# locker session holds.
+# ---------------------------------------------------------------------
+$node_sub->safe_psql('postgres',
+	"SELECT injection_points_wakeup('clt-pending-flush-before-open');
+	 SELECT injection_points_detach('clt-pending-flush-before-open');");
+
+# Confirm the PA worker is actually parked waiting on the CLT lock —
+# this verifies we are exercising the deferred-insert lock-wait path,
+# not racing past it.
+my $clt_oid = $node_sub->safe_psql('postgres',
+	"SELECT '$clt'::regclass::oid");
+ok( $node_sub->poll_query_until(
+		'postgres', qq{
+		SELECT EXISTS (
+			SELECT 1
+			FROM pg_locks l
+			JOIN pg_stat_activity a ON l.pid = a.pid
+			WHERE NOT l.granted
+			  AND l.relation = $clt_oid
+			  AND a.backend_type = 'logical replication parallel worker'
+		);
+	}, 't'),
+	'PA worker is blocked on the CLT lock inside ProcessPendingConflictLogTuple');
+
+# ---------------------------------------------------------------------
+# Release the lock.  PA wakes, inserts the deferred row, commits its
+# CLT txn, re-throws the original error to the leader, and the leader
+# disables the subscription (disable_on_error = true).
+# ---------------------------------------------------------------------
+$locker->query_safe('COMMIT;');
+ok($locker->quit, 'locker session closed cleanly');
+
+ok( $node_sub->poll_query_until(
+		'postgres',
+		"SELECT subenabled = false FROM pg_subscription WHERE subname = 's'",
+		't'),
+	'subscription disabled after the conflict');
+
+# ---------------------------------------------------------------------
+# Verify the deferred conflict log tuple survived the lock wait.
+# ---------------------------------------------------------------------
+my $rows = $node_sub->safe_psql('postgres',
+	"SELECT count(*) FROM $clt WHERE conflict_type = 'insert_exists'");
+is($rows, '1',
+	'deferred CLT insert by PA worker succeeded after lock release');
+
+# ---------------------------------------------------------------------
+# Also verify the conflict was reported in the server log
+# (conflict_log_destination = 'all' logs to both the table and the log).
+# ---------------------------------------------------------------------
+my $log_contents = slurp_file($node_sub->logfile, $log_offset);
+like(
+	$log_contents,
+	qr/ERROR:\s+conflict detected on relation "public\.t": conflict=insert_exists/,
+	'conflict reported in server log');
+
+done_testing();
-- 
2.50.1 (Apple Git-155)



Attachments:

  [text/plain] v53_Add-TAP-test-for-parallel-apply-deferred-CLT-ins.txt (11.6K, 2-v53_Add-TAP-test-for-parallel-apply-deferred-CLT-ins.txt)
  download | inline diff:
From bae4f16a7a6a80d2d27ba26a7eac70b9dc089785 Mon Sep 17 00:00:00 2001
From: Nisha Moond <[email protected]>
Date: Mon, 22 Jun 2026 11:50:12 +0530
Subject: [PATCH v53] Add TAP test for parallel-apply deferred CLT insert race

Test the race where, on an ERROR-level conflict, a parallel apply (PA)
worker logs the deferred conflict row in a fresh transaction in its
PG_CATCH path.  AbortOutOfAnyTransaction() in the PG_CATCH releases
the worker's transaction lock before the deferred insert runs, so the
leader's pa_wait_for_xact_finish() unblocks, sees a non-finished state,
raises the "lost connection to the logical replication parallel
apply worker", and SIGTERMs the PA mid-insert -- the deferred conflict
row is lost.

Also verifies that PA correctly inserts the conflict into the CLT and also
reports it in the server log with fix.
---
 src/backend/replication/logical/conflict.c    |   8 +
 src/test/subscription/meson.build             |   1 +
 .../t/039_pa_conflict_log_lock_wait.pl        | 204 ++++++++++++++++++
 3 files changed, 213 insertions(+)
 create mode 100644 src/test/subscription/t/039_pa_conflict_log_lock_wait.pl

diff --git a/src/backend/replication/logical/conflict.c b/src/backend/replication/logical/conflict.c
index c2c15f055e6..6ec57180813 100644
--- a/src/backend/replication/logical/conflict.c
+++ b/src/backend/replication/logical/conflict.c
@@ -31,6 +31,7 @@
 #include "storage/lmgr.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
+#include "utils/injection_point.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_lsn.h"
@@ -484,6 +485,13 @@ ProcessPendingConflictLogTuple(void)
 	StartTransactionCommand();
 	PushActiveSnapshot(GetTransactionSnapshot());
 
+	/*
+	 * Test hook: pause here so a TAP test can take a conflicting lock on
+	 * the conflict log table before this transaction tries to open it.
+	 * See src/test/subscription/t/039_pa_conflict_log_lock_wait.pl.
+	 */
+	INJECTION_POINT("clt-pending-flush-before-open", NULL);
+
 	/* Open conflict log table and insert the tuple */
 	conflictlogrel = GetConflictLogDestAndTable(&dest);
 	Assert(conflictlogrel);
diff --git a/src/test/subscription/meson.build b/src/test/subscription/meson.build
index e71e95c6297..225f90a37b3 100644
--- a/src/test/subscription/meson.build
+++ b/src/test/subscription/meson.build
@@ -48,6 +48,7 @@ tests += {
       't/036_sequences.pl',
       't/037_except.pl',
       't/038_walsnd_shutdown_timeout.pl',
+      't/039_pa_conflict_log_lock_wait.pl',
       't/100_bugs.pl',
     ],
   },
diff --git a/src/test/subscription/t/039_pa_conflict_log_lock_wait.pl b/src/test/subscription/t/039_pa_conflict_log_lock_wait.pl
new file mode 100644
index 00000000000..d69f5b43656
--- /dev/null
+++ b/src/test/subscription/t/039_pa_conflict_log_lock_wait.pl
@@ -0,0 +1,204 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+# Test that a parallel apply (PA) worker correctly inserts a deferred
+# conflict-log tuple even when, by the time it reaches
+# ProcessPendingConflictLogTuple(), the conflict log table is held under
+# ACCESS EXCLUSIVE by another session.
+#
+# The window we want to exercise is narrow: PA must already be past
+# ReportApplyConflict() (so the error has fired and PA is in PG_CATCH),
+# and the locker must take the CLT lock *before* PA reaches the second
+# CLT open inside ProcessPendingConflictLogTuple().  An injection point
+# pauses PA at exactly that point so the locker can grab the lock first;
+# without it, the locker either takes the lock too early (PA blocks in
+# the inline CLT open inside ReportApplyConflict, before the error has
+# fired) or too late (PA inserts before the lock is taken).
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+# ---------------------------------------------------------------------
+# Set up publisher and subscriber.  Force every transaction to stream so
+# the conflict is handled by a parallel apply worker rather than the
+# leader.
+# ---------------------------------------------------------------------
+my $node_pub = PostgreSQL::Test::Cluster->new('publisher');
+$node_pub->init(allows_streaming => 'logical');
+$node_pub->append_conf('postgresql.conf', q{
+debug_logical_replication_streaming = immediate
+logical_decoding_work_mem = 64kB
+});
+$node_pub->start;
+
+my $node_sub = PostgreSQL::Test::Cluster->new('subscriber');
+$node_sub->init;
+$node_sub->append_conf('postgresql.conf', q{
+shared_preload_libraries = 'injection_points'
+max_logical_replication_workers = 4
+max_parallel_apply_workers_per_subscription = 2
+});
+$node_sub->start;
+
+# Replicated table; the pre-existing row on the subscriber is what makes
+# the publisher's INSERT (id=1) trigger an INSERT_EXISTS conflict.
+$node_pub->safe_psql('postgres', q{
+	CREATE TABLE t (id int PRIMARY KEY, val text);
+	ALTER TABLE t REPLICA IDENTITY FULL;
+	CREATE PUBLICATION p FOR TABLE t;
+});
+
+$node_sub->safe_psql('postgres', q{
+	CREATE TABLE t (id int PRIMARY KEY, val text);
+	INSERT INTO t VALUES (1, 'pre-existing');
+	CREATE EXTENSION injection_points;
+});
+
+my $pub_connstr = $node_pub->connstr . ' dbname=postgres';
+$node_sub->safe_psql('postgres', qq{
+	CREATE SUBSCRIPTION s
+	CONNECTION '$pub_connstr'
+	PUBLICATION p
+	WITH (streaming = parallel,
+	      conflict_log_destination = 'all',
+	      disable_on_error = true);
+});
+
+$node_sub->wait_for_subscription_sync($node_pub, 's');
+
+# ---------------------------------------------------------------------
+# Send a non-conflicting INSERT and then wait until pg_subscription_rel
+# reaches 'r' (ready) on every relation.  pa_can_start() requires
+# AllTablesyncsReady(), which returns true only when every
+# pg_subscription_rel row is 'r'.  The 's' (syncdone) -> 'r' transition
+# fires inside ProcessSyncingTablesForApply, which only flips the state
+# when the apply worker's last_received LSN has advanced past the
+# tablesync end LSN -- so we need a triggering commit on the publisher
+# to drive last_received forward.  Without this step, the conflict txn
+# below would arrive while the table is still 's', pa_can_start() would
+# return false, the leader would spool to file and apply serially, and
+# no parallel apply worker would ever spawn.
+# ---------------------------------------------------------------------
+$node_pub->safe_psql('postgres', "INSERT INTO t VALUES (1000, 'warmup');");
+$node_sub->poll_query_until('postgres',
+	"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r');"
+) or die "subscription tables did not reach READY state";
+
+# ---------------------------------------------------------------------
+# Look up the per-subscription CLT name.
+# ---------------------------------------------------------------------
+my $sub_oid = $node_sub->safe_psql('postgres',
+	"SELECT oid FROM pg_subscription WHERE subname = 's'");
+my $clt = "pg_conflict.pg_conflict_log_$sub_oid";
+note "conflict log table for subscription s: $clt";
+
+# ---------------------------------------------------------------------
+# Arm the injection point.  This pauses the PA worker inside
+# ProcessPendingConflictLogTuple() — i.e. *after* the error has fired
+# and the PG_CATCH has run, *before* the second open of the CLT.  This
+# is the exact window the deferred-insert path needs to be tested in.
+# ---------------------------------------------------------------------
+$node_sub->safe_psql('postgres',
+	"SELECT injection_points_attach('clt-pending-flush-before-open', 'wait');");
+
+# ---------------------------------------------------------------------
+# Drive the conflict.  PA receives the streamed txn, hits INSERT_EXISTS
+# inside ReportApplyConflict (which opens/closes the CLT cleanly while
+# preparing the deferred tuple), then ereport(ERROR) fires, PG_CATCH
+# runs, and PA enters ProcessPendingConflictLogTuple — where it pauses
+# at the injection point.
+# ---------------------------------------------------------------------
+my $log_offset = -s $node_sub->logfile;
+
+$node_pub->safe_psql('postgres', q{
+	BEGIN;
+	INSERT INTO t SELECT g, repeat('x', 1000) FROM generate_series(2, 200) g;
+	INSERT INTO t VALUES (1, 'conflict');
+	COMMIT;
+});
+
+# Wait until PA is parked at the injection point.
+$node_sub->wait_for_event('logical replication parallel worker',
+	'clt-pending-flush-before-open');
+
+# ---------------------------------------------------------------------
+# Now take ACCESS EXCLUSIVE on the CLT.  TRUNCATE is permitted on CLTs;
+# At this point the CLT is empty, so the TRUNCATE is effectively a no-op
+# that just acquires the lock.
+# Because PA is paused at the injection point, this lock is guaranteed
+# to be acquired *before* PA tries to open the CLT.
+# ---------------------------------------------------------------------
+my $locker = $node_sub->background_psql('postgres');
+$locker->query_until(qr/locker_ready/, qq{
+	\\echo locker_ready
+	BEGIN;
+	TRUNCATE $clt;
+});
+
+# ---------------------------------------------------------------------
+# Wake the PA from the injection point.  It will now try to open the
+# CLT inside ProcessPendingConflictLogTuple and block on the lock the
+# locker session holds.
+# ---------------------------------------------------------------------
+$node_sub->safe_psql('postgres',
+	"SELECT injection_points_wakeup('clt-pending-flush-before-open');
+	 SELECT injection_points_detach('clt-pending-flush-before-open');");
+
+# Confirm the PA worker is actually parked waiting on the CLT lock —
+# this verifies we are exercising the deferred-insert lock-wait path,
+# not racing past it.
+my $clt_oid = $node_sub->safe_psql('postgres',
+	"SELECT '$clt'::regclass::oid");
+ok( $node_sub->poll_query_until(
+		'postgres', qq{
+		SELECT EXISTS (
+			SELECT 1
+			FROM pg_locks l
+			JOIN pg_stat_activity a ON l.pid = a.pid
+			WHERE NOT l.granted
+			  AND l.relation = $clt_oid
+			  AND a.backend_type = 'logical replication parallel worker'
+		);
+	}, 't'),
+	'PA worker is blocked on the CLT lock inside ProcessPendingConflictLogTuple');
+
+# ---------------------------------------------------------------------
+# Release the lock.  PA wakes, inserts the deferred row, commits its
+# CLT txn, re-throws the original error to the leader, and the leader
+# disables the subscription (disable_on_error = true).
+# ---------------------------------------------------------------------
+$locker->query_safe('COMMIT;');
+ok($locker->quit, 'locker session closed cleanly');
+
+ok( $node_sub->poll_query_until(
+		'postgres',
+		"SELECT subenabled = false FROM pg_subscription WHERE subname = 's'",
+		't'),
+	'subscription disabled after the conflict');
+
+# ---------------------------------------------------------------------
+# Verify the deferred conflict log tuple survived the lock wait.
+# ---------------------------------------------------------------------
+my $rows = $node_sub->safe_psql('postgres',
+	"SELECT count(*) FROM $clt WHERE conflict_type = 'insert_exists'");
+is($rows, '1',
+	'deferred CLT insert by PA worker succeeded after lock release');
+
+# ---------------------------------------------------------------------
+# Also verify the conflict was reported in the server log
+# (conflict_log_destination = 'all' logs to both the table and the log).
+# ---------------------------------------------------------------------
+my $log_contents = slurp_file($node_sub->logfile, $log_offset);
+like(
+	$log_contents,
+	qr/ERROR:\s+conflict detected on relation "public\.t": conflict=insert_exists/,
+	'conflict reported in server log');
+
+done_testing();
-- 
2.50.1 (Apple Git-155)



view thread (154+ messages)

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]
  Subject: Re: Proposal: Conflict log history table for Logical Replication
  In-Reply-To: <CABdArM6r0u6e2HQY3CQ15uZwOD2iTtVndecsNyK4R5tX9+eaNQ@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox