public inbox for [email protected]
help / color / mirror / Atom feedFrom: Andres Freund <[email protected]>
Subject: [PATCH v4 07/14] Hacky implementation of making read_stream_reset()/end() not wait for IO
Date: Tue, 31 Mar 2026 13:27:29 -0400
Not waiting for IO during read_stream_reset() can be important for performance
in cases where read streams are frequently reset before the end is
reached. Current users do not commonly do that, but the upcoming work to use a
read stream to prefetch table blocks as part of index scans can do so
frequently in some query patterns. E.g. if there is an index scan on the inner
side of a nested loop.
FIXME: This implementation is problematic though, as there is nothing forcing
the discarded IOs to ever be completed if the backend goes idle. That's at the
very least problematic because it leads to the underlying buffers continuing
to be pinned and the IOs showing up in the pg_aios view.
Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
src/include/storage/aio.h | 1 +
src/backend/storage/aio/aio.c | 26 +++++++++++++++++++
src/backend/storage/aio/read_stream.c | 36 +++++++++++++++++++++------
3 files changed, 55 insertions(+), 8 deletions(-)
diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h
index ec543b78409..c184e97a977 100644
--- a/src/include/storage/aio.h
+++ b/src/include/storage/aio.h
@@ -328,6 +328,7 @@ extern int pgaio_wref_get_id(PgAioWaitRef *iow);
extern void pgaio_wref_wait(PgAioWaitRef *iow);
extern bool pgaio_wref_check_done(PgAioWaitRef *iow);
+extern void pgaio_wref_discard_result(PgAioWaitRef *iow);
/* --------------------------------------------------------------------------------
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index 8f7e26607b9..438d5911126 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -1050,6 +1050,32 @@ pgaio_wref_check_done(PgAioWaitRef *iow)
return false;
}
+void
+pgaio_wref_discard_result(PgAioWaitRef *iow)
+{
+ uint64 ref_generation;
+ bool am_owner;
+ PgAioHandle *ioh;
+ PgAioHandleState state;
+
+ ioh = pgaio_io_from_wref(iow, &ref_generation);
+
+ am_owner = ioh->owner_procno == MyProcNumber;
+
+ if (!am_owner)
+ elog(ERROR, "not you");
+
+ if (pgaio_io_was_recycled(ioh, ref_generation, &state))
+ return;
+
+ pgaio_debug_io(DEBUG2, ioh,
+ "discarding result %p",
+ ioh->report_return);
+
+ if (ioh->resowner)
+ pgaio_io_release_resowner(&ioh->resowner_node, false);
+}
+
/* --------------------------------------------------------------------------------
diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index 9d22a119bef..397e6576689 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -467,7 +467,7 @@ read_stream_should_look_ahead(ReadStream *stream)
return false;
/* If the callback has signaled end-of-stream, we're done */
- if (stream->readahead_distance == 0)
+ if (stream->readahead_distance <= 0)
return false;
/* never pin more buffers than allowed */
@@ -522,7 +522,7 @@ read_stream_should_issue_now(ReadStream *stream)
* If the callback has signaled end-of-stream, start the pending read
* immediately. There is no further potential for IO combining.
*/
- if (stream->readahead_distance == 0)
+ if (stream->readahead_distance <= 0)
return true;
/*
@@ -635,7 +635,7 @@ read_stream_look_ahead(ReadStream *stream)
* stream. In the worst case we can always make progress one buffer at a
* time.
*/
- Assert(stream->pinned_buffers > 0 || stream->readahead_distance == 0);
+ Assert(stream->pinned_buffers > 0 || stream->readahead_distance <= 0);
if (stream->batch_mode)
pgaio_exit_batchmode();
@@ -1025,7 +1025,7 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Assert(stream->oldest_buffer_index == stream->next_buffer_index);
/* End of stream reached? */
- if (stream->readahead_distance == 0)
+ if (stream->readahead_distance <= 0)
return InvalidBuffer;
/*
@@ -1039,7 +1039,7 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
/* End of stream reached? */
if (stream->pinned_buffers == 0)
{
- Assert(stream->readahead_distance == 0);
+ Assert(stream->readahead_distance <= 0);
return InvalidBuffer;
}
}
@@ -1066,7 +1066,27 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Assert(stream->ios[io_index].op.buffers ==
&stream->buffers[oldest_buffer_index]);
- needed_wait = WaitReadBuffers(&stream->ios[io_index].op);
+ /*
+ * If the stream has been reset, don't even wait for the IO, just
+ * discard it.
+ */
+ if (stream->readahead_distance < 0)
+ {
+ if (pgaio_wref_valid(&stream->ios[io_index].op.io_wref) &&
+ !stream->ios[io_index].op.foreign_io)
+ {
+ pgaio_wref_discard_result(&stream->ios[io_index].op.io_wref);
+ pgaio_wref_clear(&stream->ios[io_index].op.io_wref);
+ }
+ else
+ WaitReadBuffers(&stream->ios[io_index].op);
+
+ needed_wait = false;
+ }
+ else
+ {
+ needed_wait = WaitReadBuffers(&stream->ios[io_index].op);
+ }
Assert(stream->ios_in_progress > 0);
stream->ios_in_progress--;
@@ -1297,8 +1317,8 @@ read_stream_reset(ReadStream *stream)
Buffer buffer;
/* Stop looking ahead. */
- stream->readahead_distance = 0;
- stream->combine_distance = 0;
+ stream->readahead_distance = -1;
+ stream->combine_distance = -1;
/* Forget buffered block number and fast path state. */
stream->buffered_blocknum = InvalidBlockNumber;
--
2.53.0.1.gb2826b52eb
--ik3l34cft6pr4f53--
view thread (2+ messages)
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected]
Subject: Re: [PATCH v4 07/14] Hacky implementation of making read_stream_reset()/end() not wait for IO
In-Reply-To: <no-message-id-601364@localhost>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox