From c4342f7117df0c7ab420155d8e2726a5f0a8dd6e Mon Sep 17 00:00:00 2001 From: spoondla Date: Mon, 12 Jan 2026 15:29:48 -0800 Subject: [PATCH v2] Document correct kernel requirements for io_uring Add startup-time kernel version check for io_uring While io_uring was introduced in Linux 5.1, PostgreSQL requires kernel version 5.6 or newer due to the io_uring operations it relies on. Earlier kernels may appear to support io_uring but can fail at runtime. Updated the internal AIO documentation and the sample configuration file to state the correct minimum kernel requirement. --- configure.ac | 2 +- meson.build | 5 + src/backend/storage/aio/README.md | 8 +- src/backend/storage/aio/method_io_uring.c | 108 ++++++++++++++++++ src/backend/utils/misc/postgresql.conf.sample | 2 +- 5 files changed, 122 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 145197e6bd6..f056966c25b 100644 --- a/configure.ac +++ b/configure.ac @@ -1417,7 +1417,7 @@ fi if test "$with_liburing" = yes; then _LIBS="$LIBS" LIBS="$LIBURING_LIBS $LIBS" - AC_CHECK_FUNCS([io_uring_queue_init_mem]) + AC_CHECK_FUNCS([io_uring_queue_init_mem io_uring_opcode_supported]) LIBS="$_LIBS" fi diff --git a/meson.build b/meson.build index 555c94796c6..c4a5271a18b 100644 --- a/meson.build +++ b/meson.build @@ -1040,6 +1040,11 @@ if liburing.found() cdata.set('HAVE_IO_URING_QUEUE_INIT_MEM', 1) endif + if cc.has_function('io_uring_opcode_supported', + dependencies: liburing, args: test_c_args) + cdata.set('HAVE_IO_URING_OPCODE_SUPPORTED', 1) + endif + endif diff --git a/src/backend/storage/aio/README.md b/src/backend/storage/aio/README.md index 72ae3b3737d..c40a6ce16cf 100644 --- a/src/backend/storage/aio/README.md +++ b/src/backend/storage/aio/README.md @@ -256,10 +256,16 @@ synchronous manner. #### io_uring -`io_method=io_uring` is available on Linux 5.1+. In contrast to worker mode it +`io_method=io_uring` is available on Linux 5.6+. In contrast to worker mode it dispatches all IO from within the process, lowering context switch rate / latency. +While io_uring was introduced in Linux kernel 5.1, the operations required by +PostgreSQL (IORING_OP_READ and IORING_OP_WRITE opcodes for non-vectored I/O) +are only available starting with Linux kernel 5.6. Attempting to use io_uring +on kernels between 5.1 and 5.5 will result in runtime errors (EINVAL) when +connections are established. + ### AIO Handles diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index af58c6118ac..2853c92eb15 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -30,6 +30,7 @@ #ifdef IOMETHOD_IO_URING_ENABLED #include +#include #include #include @@ -225,6 +226,96 @@ pgaio_uring_check_capabilities(void) pgaio_uring_caps.checked = true; } +/* + * Check if the kernel supports the required io_uring operations. + * + * PostgreSQL requires four io_uring opcodes: + * - IORING_OP_READ and IORING_OP_WRITE (added in kernel 5.6) + * - IORING_OP_READV and IORING_OP_WRITEV (added in kernel 5.1) + * + * While io_uring was introduced in Linux 5.1 with vectored operations, + * the non-vectored READ/WRITE opcodes weren't added until 5.6. Since + * PostgreSQL uses all four, we need kernel 5.6+. + * + * Rather than checking kernel version (which is unreliable due to vendor + * backports), we probe for actual opcode support when possible. + * + * Returns true if any required opcode is NOT supported. + */ +static bool +is_uring_read_write_unsupported(void) +{ + struct io_uring test_ring; + struct io_uring_params p = {0}; + int ret; + bool unsupported = false; + + /* Create a temporary ring to probe capabilities */ + ret = io_uring_queue_init(2, &test_ring, 0); + if (ret < 0) + { + /* + * If we can't even create a ring, let the normal initialization path + * handle the error with appropriate messages. + */ + return false; + } + +#ifdef HAVE_IO_URING_OPCODE_SUPPORTED + /* + * Use io_uring_opcode_supported() if available (liburing 2.1+). + * This directly queries the kernel for opcode support. + * + * PostgreSQL uses both single-buffer (READ/WRITE) and vectored + * (READV/WRITEV) operations. READV/WRITEV were added in kernel 5.1, + * but READ/WRITE were added in kernel 5.6. Check for all four to + * ensure complete support. + */ + if (!io_uring_opcode_supported(&test_ring, IORING_OP_READ) || + !io_uring_opcode_supported(&test_ring, IORING_OP_WRITE) || + !io_uring_opcode_supported(&test_ring, IORING_OP_READV) || + !io_uring_opcode_supported(&test_ring, IORING_OP_WRITEV)) + { + unsupported = true; + } +#else + /* + * Fallback: Try to probe by checking if we can prepare read operations. + * Kernels without IORING_OP_READ support will fail later, but at least + * we tried. This is less reliable but works with older liburing. + */ + { + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(&test_ring); + if (sqe) + { + /* + * Prepare a dummy read operation. On kernels without + * IORING_OP_READ support, this will be accepted here but fail + * with EINVAL when submitted. We'd need to actually submit to + * detect, but that requires a valid fd. The version check is a + * reasonable fallback. + */ + struct utsname uts; + int major, + minor; + + if (uname(&uts) == 0 && + sscanf(uts.release, "%d.%d", &major, &minor) == 2) + { + /* Known problematic kernel range */ + if (major == 5 && minor >= 1 && minor <= 5) + unsupported = true; + } + } + } +#endif + + io_uring_queue_exit(&test_ring); + return unsupported; +} + /* * Memory for all PgAioUringContext instances */ @@ -284,6 +375,23 @@ pgaio_uring_shmem_init(bool first_time) size_t ring_mem_remain = 0; char *ring_mem_next = 0; + /* + * Check if the kernel supports the required io_uring operations before + * attempting full initialization. Kernels without all required opcodes + * (IORING_OP_READ, WRITE, READV, WRITEV) will cause runtime EINVAL errors. + */ + if (is_uring_read_write_unsupported()) + { + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("kernel does not support required io_uring operations"), + errdetail("The kernel supports io_uring but lacks one or more of the " + "required opcodes (IORING_OP_READ, IORING_OP_WRITE, " + "IORING_OP_READV, IORING_OP_WRITEV). " + "This typically occurs on Linux kernels older than 5.6."), + errhint("Either upgrade your kernel to version 5.6 or newer, or use io_method=worker.")); + } + /* * We allocate memory for all PgAioUringContext instances and, if * supported, the memory required for each of the io_uring instances, in diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index dc9e2255f8a..1648f4be207 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -204,7 +204,7 @@ # (change requires restart) #io_combine_limit = 128kB # usually 1-128 blocks (depends on OS) -#io_method = worker # worker, io_uring, sync +#io_method = worker # worker, io_uring (Linux 5.6+), sync # (change requires restart) #io_max_concurrency = -1 # Max number of IOs that one process # can execute simultaneously -- 2.39.5 (Apple Git-154)