public inbox for [email protected]
help / color / mirror / Atom feedFrom: SATYANARAYANA NARLAPURAM <[email protected]>
To: PostgreSQL Hackers <[email protected]>
Subject: Make pg_prewarm, autoprewarm yield for waiting DDL
Date: Wed, 25 Mar 2026 14:32:23 -0700
Message-ID: <CAHg+QDfdoR=7iqEAvLW9qtzV0Sx1wp2FuALeamqcCdiVEmMF-Q@mail.gmail.com> (raw)
Both pg_prewarm() and the autoprewarm background worker hold
AccessShareLock on the target relation for the entire duration of
prewarming. On large tables this can take a long time, which means
that any DDL that needs a stronger lock (TRUNCATE, DROP TABLE, ALTER TABLE,
etc.) is blocked for the full duration.
VACUUM already solves this same problem during heap truncation: it
periodically calls LockHasWaitersRelation() and backs off when a
conflicting waiter is detected (see lazy_truncate_heap()).
The attached patch applies the same pattern to pg_prewarm and autoprewarm.
Every 1024 blocks, each code path checks for a waiter and if found then the
lock is released so that the DDL can proceed. Patch handles the relation
truncation, drop cases by emitting an error message. If the relation was
only partially truncated, the endpoint is adjusted downward and prewarming
continues.
When no DDL is waiting, the only overhead is one lock-table probe per 1024
blocks.
While developing this patch I discovered that LockHasWaiters() crashes with
a segfault when the lock in question was acquired via the fast-path
optimization, details in [1].
The patch includes a TAP test (t/002_lock_yield.pl) that exercises the
TRUNCATE and DROP TABLE scenarios using injection points.
Thanks,
Satya
[1]:
https://www.postgresql.org/message-id/CAHg%2BQDe_%3DZahnRx37bzrqYenKn_S5YDQ00fTfwe-ZUmjqO%3DqLg%40ma...
Attachments:
[application/octet-stream] 0001-pg_prewarm-yield-lock-for-ddl.patch (14.2K, 3-0001-pg_prewarm-yield-lock-for-ddl.patch)
download | inline diff:
diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile
index 617ac8e0..53bce449 100644
--- a/contrib/pg_prewarm/Makefile
+++ b/contrib/pg_prewarm/Makefile
@@ -12,6 +12,9 @@ PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache"
REGRESS = pg_prewarm
+EXTRA_INSTALL = src/test/modules/injection_points
+export enable_injection_points
+
TAP_TESTS = 1
ifdef USE_PGXS
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index ba0bc8e6..2c207efa 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -39,12 +39,14 @@
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/latch.h"
+#include "storage/lmgr.h"
#include "storage/lwlock.h"
#include "storage/procsignal.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/guc.h"
+#include "utils/injection_point.h"
#include "utils/rel.h"
#include "utils/relfilenumbermap.h"
#include "utils/timestamp.h"
@@ -52,6 +54,11 @@
#define AUTOPREWARM_FILE "autoprewarm.blocks"
+/*
+ * Block interval for checking conflicting lock waiters during prewarming.
+ */
+#define PREWARM_WAITER_CHECK_INTERVAL 1024
+
/* Metadata for each block we dump. */
typedef struct BlockInfoRecord
{
@@ -635,13 +642,83 @@ autoprewarm_database_main(Datum main_arg)
* read stream callback will check that we still have free buffers
* before requesting each block from the read stream API.
*/
- while ((buf = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
{
- apw_state->prewarmed_blocks++;
- ReleaseBuffer(buf);
- }
+ int blocks_since_check = 0;
+ bool rel_dropped = false;
- read_stream_end(stream);
+ while ((buf = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
+ {
+ CHECK_FOR_INTERRUPTS();
+ apw_state->prewarmed_blocks++;
+ ReleaseBuffer(buf);
+ blocks_since_check++;
+
+ /*
+ * Periodically check for conflicting lock waiters. If
+ * found, end the current read stream and yield our lock
+ * so the waiter can proceed.
+ */
+ if (blocks_since_check >= PREWARM_WAITER_CHECK_INTERVAL)
+ {
+ blocks_since_check = 0;
+
+ INJECTION_POINT("autoprewarm-before-check-and-yield", NULL);
+
+ if (LockHasWaitersRelation(rel, AccessShareLock))
+ {
+ read_stream_end(stream);
+ relation_close(rel, AccessShareLock);
+
+ /* Reacquire and check if relation is still valid. */
+ rel = try_relation_open(reloid, AccessShareLock);
+ if (rel == NULL)
+ {
+ rel_dropped = true;
+ break;
+ }
+
+ /*
+ * Recalculate fork size; skip remainder if
+ * truncated.
+ */
+ if (!smgrexists(RelationGetSmgr(rel), forknum))
+ {
+ i = p.pos;
+ blk = block_info[i];
+ break;
+ }
+ p.nblocks = RelationGetNumberOfBlocksInFork(rel, forknum);
+
+ /* Restart stream for remaining blocks. */
+ stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
+ READ_STREAM_DEFAULT |
+ READ_STREAM_USE_BATCHING,
+ NULL,
+ rel,
+ p.forknum,
+ apw_read_stream_next_block,
+ &p,
+ 0);
+ }
+ }
+ }
+
+ if (rel_dropped)
+ {
+ /* Fast-forward past remaining blocks for this relation. */
+ for (; i < apw_state->prewarm_stop_idx; i++)
+ {
+ blk = block_info[i];
+ if (blk.tablespace != tablespace ||
+ blk.filenumber != filenumber)
+ break;
+ }
+ CommitTransactionCommand();
+ continue; /* outer while loop */
+ }
+
+ read_stream_end(stream);
+ }
/* Advance i past all the blocks just prewarmed. */
i = p.pos;
diff --git a/contrib/pg_prewarm/meson.build b/contrib/pg_prewarm/meson.build
index e70546a4..8cc66073 100644
--- a/contrib/pg_prewarm/meson.build
+++ b/contrib/pg_prewarm/meson.build
@@ -37,6 +37,7 @@ tests += {
'tap': {
'tests': [
't/001_basic.pl',
+ 't/002_lock_yield.pl',
],
},
}
diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c
index c2716086..6e8f2d93 100644
--- a/contrib/pg_prewarm/pg_prewarm.c
+++ b/contrib/pg_prewarm/pg_prewarm.c
@@ -25,6 +25,7 @@
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/builtins.h"
+#include "utils/injection_point.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
@@ -44,6 +45,74 @@ typedef enum
static PGIOAlignedBlock blockbuffer;
+/*
+ * Block interval for checking conflicting lock waiters. Checking every
+ * block is too expensive because LockHasWaitersRelation performs a
+ * lock-table probe, but we don't want to check too infrequently either, to avoid
+ * long stalls when there are waiters.
+ */
+#define PREWARM_WAITER_CHECK_INTERVAL 1024
+
+/*
+ * Check whether any session is waiting for a lock that conflicts with
+ * AccessShareLock on the relation. If so, release the lock to let the
+ * waiter proceed, then try to reacquire it.
+ *
+ * Throws an error if the relation was dropped or truncated past 'next_block' while
+ * the lock was not held.
+ */
+static Relation
+pg_prewarm_check_and_yield(Relation rel, Oid relOid, Oid privOid,
+ ForkNumber forkNumber, int64 next_block,
+ int64 blocks_done, int64 *last_block)
+{
+ int64 nblocks;
+
+ INJECTION_POINT("pg_prewarm-before-check-and-yield", NULL);
+
+ /* Nothing to do if nobody is waiting for a conflicting lock. */
+ if (!LockHasWaitersRelation(rel, AccessShareLock))
+ return rel;
+
+ /* Release all locks to let the waiter proceed. */
+ relation_close(rel, AccessShareLock);
+ if (privOid != relOid)
+ UnlockRelationOid(privOid, AccessShareLock);
+
+ /* Reacquire in the correct order: parent table before index. */
+ if (privOid != relOid)
+ LockRelationOid(privOid, AccessShareLock);
+ rel = try_relation_open(relOid, AccessShareLock);
+ if (rel == NULL)
+ {
+ if (privOid != relOid)
+ UnlockRelationOid(privOid, AccessShareLock);
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("relation was dropped during pg_prewarm after %" PRId64 " blocks",
+ blocks_done)));
+ }
+
+ /* Check if the fork still exists and has enough blocks. */
+ if (!smgrexists(RelationGetSmgr(rel), forkNumber) ||
+ (nblocks = RelationGetNumberOfBlocksInFork(rel, forkNumber)) <= next_block)
+ {
+ relation_close(rel, AccessShareLock);
+ if (privOid != relOid)
+ UnlockRelationOid(privOid, AccessShareLock);
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("relation was truncated during pg_prewarm after %" PRId64 " blocks",
+ blocks_done)));
+ }
+
+ /* Adjust endpoint if the relation was partially truncated. */
+ if (*last_block >= nblocks)
+ *last_block = nblocks - 1;
+
+ return rel;
+}
+
/*
* pg_prewarm(regclass, mode text, fork text,
* first_block int8, last_block int8)
@@ -208,6 +277,11 @@ pg_prewarm(PG_FUNCTION_ARGS)
for (block = first_block; block <= last_block; ++block)
{
CHECK_FOR_INTERRUPTS();
+ if (blocks_done > 0 &&
+ blocks_done % PREWARM_WAITER_CHECK_INTERVAL == 0)
+ rel = pg_prewarm_check_and_yield(rel, relOid, privOid,
+ forkNumber, block,
+ blocks_done, &last_block);
PrefetchBuffer(rel, forkNumber, block);
++blocks_done;
}
@@ -227,6 +301,11 @@ pg_prewarm(PG_FUNCTION_ARGS)
for (block = first_block; block <= last_block; ++block)
{
CHECK_FOR_INTERRUPTS();
+ if (blocks_done > 0 &&
+ blocks_done % PREWARM_WAITER_CHECK_INTERVAL == 0)
+ rel = pg_prewarm_check_and_yield(rel, relOid, privOid,
+ forkNumber, block,
+ blocks_done, &last_block);
smgrread(RelationGetSmgr(rel), forkNumber, block, blockbuffer.data);
++blocks_done;
}
@@ -266,6 +345,33 @@ pg_prewarm(PG_FUNCTION_ARGS)
buf = read_stream_next_buffer(stream, NULL);
ReleaseBuffer(buf);
++blocks_done;
+
+ /*
+ * Periodically check for conflicting lock waiters. If found, end
+ * the current read stream and yield, then start a new stream for
+ * the remaining blocks.
+ */
+ if (blocks_done % PREWARM_WAITER_CHECK_INTERVAL == 0 &&
+ block < last_block)
+ {
+ read_stream_end(stream);
+ rel = pg_prewarm_check_and_yield(rel, relOid, privOid,
+ forkNumber, block + 1,
+ blocks_done, &last_block);
+
+ /* Restart stream for remaining blocks. */
+ p.current_blocknum = block + 1;
+ p.last_exclusive = last_block + 1;
+ stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
+ READ_STREAM_FULL |
+ READ_STREAM_USE_BATCHING,
+ NULL,
+ rel,
+ forkNumber,
+ block_range_read_stream_cb,
+ &p,
+ 0);
+ }
}
Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
read_stream_end(stream);
diff --git a/contrib/pg_prewarm/t/002_lock_yield.pl b/contrib/pg_prewarm/t/002_lock_yield.pl
new file mode 100644
index 00000000..b5af7c08
--- /dev/null
+++ b/contrib/pg_prewarm/t/002_lock_yield.pl
@@ -0,0 +1,152 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Test that pg_prewarm yields its AccessShareLock when a conflicting
+# DDL operation (TRUNCATE, DROP TABLE) is waiting, and that pg_prewarm
+# reports an appropriate error afterwards.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+ plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->start;
+
+if (!$node->check_extension('injection_points'))
+{
+ plan skip_all => 'Extension injection_points not installed';
+}
+
+$node->safe_psql('postgres', q(
+ CREATE EXTENSION pg_prewarm;
+ CREATE EXTENSION injection_points;
+));
+
+# Create tables large enough to trigger the waiter check
+# (> PREWARM_WAITER_CHECK_INTERVAL = 1024 blocks).
+$node->safe_psql('postgres', q(
+ CREATE TABLE trunc_test AS
+ SELECT i, repeat('x', 200) AS padding
+ FROM generate_series(1, 50000) i;
+ CREATE TABLE drop_test AS
+ SELECT i, repeat('x', 200) AS padding
+ FROM generate_series(1, 50000) i;
+));
+
+my $nblocks = $node->safe_psql('postgres',
+ "SELECT pg_relation_size('trunc_test') / current_setting('block_size')::int");
+ok($nblocks > 1024, "trunc_test has more than 1024 blocks ($nblocks)");
+
+# ------------------------------------------------------------------
+# Test 1: Normal pg_prewarm on a large table with no conflicting waiters.
+# ------------------------------------------------------------------
+my $result = $node->safe_psql('postgres',
+ "SELECT pg_prewarm('trunc_test', 'buffer')");
+like($result, qr/^[1-9][0-9]*$/, 'pg_prewarm buffer mode succeeds normally');
+
+# ------------------------------------------------------------------
+# Test 2: TRUNCATE proceeds during pg_prewarm.
+#
+# pg_prewarm pauses at the injection point while holding AccessShareLock.
+# TRUNCATE blocks waiting for AccessExclusiveLock. After we wake up
+# pg_prewarm it detects the waiter, yields, and TRUNCATE completes.
+# pg_prewarm then errors because the relation was truncated.
+# ------------------------------------------------------------------
+$node->safe_psql('postgres',
+ "SELECT injection_points_attach('pg_prewarm-before-check-and-yield', 'wait')");
+
+my $prewarm = $node->background_psql('postgres', on_error_stop => 0);
+$prewarm->query_until(qr/starting_prewarm/, q(
+ \echo starting_prewarm
+ SELECT pg_prewarm('trunc_test', 'buffer');
+));
+
+# Wait for pg_prewarm to hit the injection point.
+$node->poll_query_until('postgres', q(
+ SELECT count(*) > 0 FROM pg_stat_activity
+ WHERE wait_event = 'pg_prewarm-before-check-and-yield';
+), 't');
+
+# Start TRUNCATE in a second session — it will block on the lock.
+my $truncate = $node->background_psql('postgres');
+$truncate->query_until(qr/starting_truncate/, q(
+ \echo starting_truncate
+ TRUNCATE trunc_test;
+));
+
+# Confirm TRUNCATE is waiting for a lock.
+$node->poll_query_until('postgres', q(
+ SELECT count(*) > 0 FROM pg_stat_activity
+ WHERE query LIKE '%TRUNCATE trunc_test%'
+ AND wait_event_type = 'Lock';
+), 't');
+
+# Detach the injection point so pg_prewarm won't pause again at the
+# next check interval, then wake it up.
+$node->safe_psql('postgres',
+ "SELECT injection_points_detach('pg_prewarm-before-check-and-yield')");
+$node->safe_psql('postgres',
+ "SELECT injection_points_wakeup('pg_prewarm-before-check-and-yield')");
+
+# TRUNCATE should now complete because pg_prewarm yielded its lock.
+$truncate->quit;
+pass('TRUNCATE completed during pg_prewarm');
+
+# pg_prewarm should have reported that the relation was truncated.
+$prewarm->quit;
+like($prewarm->{stderr},
+ qr/relation was truncated during pg_prewarm/,
+ 'pg_prewarm reports truncation after TRUNCATE');
+
+# ------------------------------------------------------------------
+# Test 3: DROP TABLE proceeds during pg_prewarm.
+# ------------------------------------------------------------------
+$node->safe_psql('postgres',
+ "SELECT injection_points_attach('pg_prewarm-before-check-and-yield', 'wait')");
+
+my $prewarm2 = $node->background_psql('postgres', on_error_stop => 0);
+$prewarm2->query_until(qr/starting_prewarm/, q(
+ \echo starting_prewarm
+ SELECT pg_prewarm('drop_test', 'buffer');
+));
+
+$node->poll_query_until('postgres', q(
+ SELECT count(*) > 0 FROM pg_stat_activity
+ WHERE wait_event = 'pg_prewarm-before-check-and-yield';
+), 't');
+
+my $drop = $node->background_psql('postgres');
+$drop->query_until(qr/starting_drop/, q(
+ \echo starting_drop
+ DROP TABLE drop_test;
+));
+
+$node->poll_query_until('postgres', q(
+ SELECT count(*) > 0 FROM pg_stat_activity
+ WHERE query LIKE '%DROP TABLE drop_test%'
+ AND wait_event_type = 'Lock';
+), 't');
+
+$node->safe_psql('postgres',
+ "SELECT injection_points_detach('pg_prewarm-before-check-and-yield')");
+$node->safe_psql('postgres',
+ "SELECT injection_points_wakeup('pg_prewarm-before-check-and-yield')");
+
+$drop->quit;
+pass('DROP TABLE completed during pg_prewarm');
+
+$prewarm2->quit;
+like($prewarm2->{stderr},
+ qr/relation was dropped during pg_prewarm/,
+ 'pg_prewarm reports drop after DROP TABLE');
+
+$node->stop;
+done_testing();
view thread (2+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected]
Subject: Re: Make pg_prewarm, autoprewarm yield for waiting DDL
In-Reply-To: <CAHg+QDfdoR=7iqEAvLW9qtzV0Sx1wp2FuALeamqcCdiVEmMF-Q@mail.gmail.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox