public inbox for [email protected]  
help / color / mirror / Atom feed
From: Vishal Prasanna <[email protected]>
To: pgsql-bugs <[email protected]>
Subject: [BUG] Assert failure in ReorderBufferReturnTXN during logical decoding due to leaked specinsert change
Date: Thu, 19 Feb 2026 19:13:10 +0530
Message-ID: <[email protected]> (raw)

Hi,



We recently encountered an issue with the Postgres server. After we dropped a publication

that was being used by a subscriber, the walsender process on the publisher started crashing

with an assertion failure in `ReorderBufferReturnTXN()` while decoding changes from the replication slot.



The error message:

"TRAP: failed Assert("txn->size == 0"), File: "reorderbuffer.c", Line: 494"



Issue observed in PostgreSQL 17.7 with assert-enabled builds.







TRAP: failed Assert("txn->size == 0"), File: "reorderbuffer.c", Line: 494, PID: 96941

0   postgres                             ExceptionalCondition + 216

1   postgres                             ReorderBufferReturnTXN + 284

2   postgres                             ReorderBufferCleanupTXN + 1292

3   postgres                             ReorderBufferProcessTXN + 4304

4   postgres                             ReorderBufferReplay + 284

5   postgres                             ReorderBufferCommit + 128

6   postgres                             DecodeCommit + 584

7   postgres                             xact_decode + 408

8   postgres                             LogicalDecodingProcessRecord + 192

9   postgres                             XLogSendLogical + 204

10  postgres                             WalSndLoop + 256

11  postgres                             StartLogicalReplication + 636

12  postgres                             exec_replication_command + 1384

13  postgres                             PostgresMain + 2476

14  postgres                             BackendInitialize + 0

15  postgres                             postmaster_child_launch + 304

16  postgres                             BackendStartup + 448

17  postgres                             ServerLoop + 372

18  postgres                             PostmasterMain + 6396

19  postgres                             startup_hacks + 0

20  dyld                                 start + 7184





Simplified steps to reproduce:



Step 1: Create a table, set up a logical replication slot, and execute an `INSERT ... ON CONFLICT ... DO UPDATE` statement.



```

CREATE TABLE test_updates (id INT PRIMARY KEY, value TEXT);

SELECT * FROM pg_create_logical_replication_slot('testing_slot', 'pgoutput');

INSERT INTO test_updates (id, value) VALUES (1, 'first_insert')

ON CONFLICT(id) DO UPDATE SET value = excluded.value;

```



Step 2: Start logical decoding using `pg_logical_slot_peek_binary_changes` on the slot,

            referencing a publication (`pub_1`) that does not exist.



```

SELECT *

FROM pg_logical_slot_peek_binary_changes('testing_slot', NULL, NULL, 'proto_version', '1', 'publication_names', 'pub_1');

```



After Step 2, the Postgres server crashes with the assertion failure shown above.



Reason for server crash:

1. In `ReorderBufferProcessTXN()`, when a `REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT`

is encountered, the change is unlinked from the list by `dlist_delete()` and stored in `specinsert`.  



2. Next, when a `REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM` is encountered,

the stored `specinsert` change is retrieved and applied as a regular insert change.



3. Since publication `pub_1` does not exist, an error is raised when applying the change

and caught by the `PG_CATCH` block in `ReorderBufferProcessTXN()`. The error call stack is:



```

ReorderBufferProcessTXN()

  ReorderBufferApplyChange()

    → rb->apply_change()           [pgoutput_change()]

      → get_rel_sync_entry()

        → LoadPublications()

          → GetPublicationByName()

            → get_publication_oid()   ← throws ERROR here

```



4. In the `PG_CATCH` block, the `specinsert` change is cleaned up only for

streaming/prepared transactions (via `ReorderBufferResetTXN()`) but not for non-streaming transactions.

In the `else` branch at line `2697`, `ReorderBufferCleanupTXN()` is called, which iterates through

the remaining changes and frees them. However, since the `specinsert` change was already unlinked

from the list, it is never found and never freed. This leaked change is still accounted for in `txn->size`,

causing the assertion failure.





Proposed Solution: Free the pending `specinsert` change in the `else` branch before calling `ReorderBufferCleanupTXN()`.

Since the `specinsert` change has already been unlinked, `ReorderBufferCleanupTXN()` cannot find or free it.

Explicitly freeing it here prevents the `Assert(txn->size == 0)` failure in `ReorderBufferReturnTXN()`.



```

--- a/src/backend/replication/logical/reorderbuffer.c

+++ b/src/backend/replication/logical/reorderbuffer.c

@@ -2691,12 +2691,18 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,

                        ReorderBufferResetTXN(rb, txn, snapshot_now,

                                                                  command_id, prev_lsn,

                                                                  specinsert);

                }

                else

                {

+                       /* Return the spec insert change before cleaning up the transaction */

+                       if (specinsert != NULL)

+                       {

+                               ReorderBufferReturnChange(rb, specinsert, true);

+                               specinsert = NULL;

+                       }

                        ReorderBufferCleanupTXN(rb, txn);

                        MemoryContextSwitchTo(ecxt);

                        PG_RE_THROW();

                }

        }

        PG_END_TRY();

```



Along with this fix, we have also added a TAP test to verify in patch.





Additional Suggestion:

Currently in `PG_CATCH` block, `specinsert` is only freed in the `ERRCODE_TRANSACTION_ROLLBACK` branch

for streaming or prepared transactions, via `ReorderBufferResetTXN()` at line 2691.

Would it make sense to move the freeing of `specinsert` before the if/else branch,

so that it is always freed regardless of the error path? This would avoid duplication and ensure

that `specinsert` is always cleaned up.





Regards,

Vishal Prasanna

Zoho Corporation

Attachments:

  [application/octet-stream] 0001-Fix-specinsert-leak-in-ReorderBufferProcessTXN-error.patch (2.6K, 3-0001-Fix-specinsert-leak-in-ReorderBufferProcessTXN-error.patch)
  download | inline diff:
From a365dfb595f60750776538dcb530a0eb1fd60e98 Mon Sep 17 00:00:00 2001
From: Vishal Prasanna <[email protected]>
Date: Wed, 18 Feb 2026 15:55:35 +0530
Subject: [PATCH] Fix specinsert leak in ReorderBufferProcessTXN error path

---
 .../replication/logical/reorderbuffer.c       |  6 +++
 src/test/subscription/t/100_bugs.pl           | 38 +++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 4bd1f7af061..eafec433d79 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -2694,6 +2694,12 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 		}
 		else
 		{
+			/* Return the specinsert change before cleaning up the transaction */
+			if (specinsert != NULL)
+			{
+				ReorderBufferReturnChange(rb, specinsert, true);
+				specinsert = NULL;
+			}
 			ReorderBufferCleanupTXN(rb, txn);
 			MemoryContextSwitchTo(ecxt);
 			PG_RE_THROW();
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index 17accd11d93..73dca25f0e5 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -597,4 +597,42 @@ $node_publisher->safe_psql('postgres', "DROP DATABASE regress_db");
 
 $node_publisher->stop('fast');
 
+# Ensure logical decoder doesn't crash if error occurs
+# while processing an INSERT ... ON CONFLICT statement
+$node_publisher = PostgreSQL::Test::Cluster->new('logical_decoder');
+$node_publisher->init(allows_streaming => 'logical');
+$node_publisher->start;
+
+$node_publisher->safe_psql(
+	'postgres', qq(
+	CREATE TABLE tab_upsert (a INT PRIMARY KEY, b INT);
+	SELECT * FROM pg_create_logical_replication_slot('upsert_slot', 'pgoutput');
+	INSERT INTO tab_upsert (a, b) VALUES (1, 1)
+		ON CONFLICT(a) DO UPDATE SET b = excluded.b;
+));
+
+# Decode the changes without a publication and
+# verify that the logical decoder doesn't crash.
+($ret, $stdout, $stderr) = $node_publisher->psql(
+	'postgres', qq(
+	SELECT *
+	FROM pg_logical_slot_peek_binary_changes(
+		'upsert_slot',
+		NULL,
+		NULL,
+		'proto_version', '1',
+		'publication_names', 'pub_that_does_not_exist'
+	);
+));
+
+ok( $stderr =~ qr/publication "pub_that_does_not_exist" does not exist/,
+	'peek logical changes with non-existent publication throws error'
+);
+
+# Clean up
+$node_publisher->safe_psql('postgres', "SELECT pg_drop_replication_slot('upsert_slot')");
+$node_publisher->safe_psql('postgres', "DROP TABLE tab_upsert");
+
+$node_publisher->stop('fast');
+
 done_testing();
-- 
2.50.1 (Apple Git-155)



reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected]
  Subject: Re: [BUG] Assert failure in ReorderBufferReturnTXN during logical decoding due to leaked specinsert change
  In-Reply-To: <[email protected]>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox