public inbox for [email protected]  
help / color / mirror / Atom feed
From: Christoph Berg <[email protected]>
To: Bertrand Drouvot <[email protected]>
Cc: Tomas Vondra <[email protected]>
Cc: Andres Freund <[email protected]>
Cc: Tomas Vondra <[email protected]>
Cc: [email protected]
Subject: Re: pgsql: Introduce pg_shmem_allocations_numa view
Date: Tue, 24 Jun 2025 20:24:22 +0200
Message-ID: <[email protected]> (raw)
In-Reply-To: <aFqnM/[email protected]>
References: <[email protected]>
	<[email protected]>
	<[email protected]>
	<[email protected]>
	<[email protected]>
	<[email protected]>
	<[email protected]>
	<aFqHoNXQ/[email protected]>
	<[email protected]>
	<aFqnM/[email protected]>

Re: Bertrand Drouvot
> Yes, something like:
> 
> diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
> index c9ae3b45b76..070ad2f13e7 100644
> --- a/src/backend/storage/ipc/shmem.c
> +++ b/src/backend/storage/ipc/shmem.c
> @@ -689,8 +689,17 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
>                         CHECK_FOR_INTERRUPTS();
>                 }
> 
> -               if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
> -                       elog(ERROR, "failed NUMA pages inquiry status: %m");
> +               #define NUMA_QUERY_CHUNK_SIZE 16  /* has to be <= DO_PAGES_STAT_CHUNK_NR (do_pages_stat())*/
> +
> +               for (uint64 chunk_start = 0; chunk_start < shm_ent_page_count; chunk_start += NUMA_QUERY_CHUNK_SIZE) {
> +                        uint64 chunk_size = Min(NUMA_QUERY_CHUNK_SIZE, shm_ent_page_count - chunk_start);
> +
> +                       if (pg_numa_query_pages(0, chunk_size, &page_ptrs[chunk_start],
> +                                                                       &pages_status[chunk_start]) == -1)
> +                               elog(ERROR, "failed NUMA pages inquiry status: %m");
> +               }
> +
> +               #undef NUMA_QUERY_CHUNK_SIZE

I uploaded a variant of this patch to Debian and it seems to have fixed the issue:

https://buildd.debian.org/status/package.php?p=postgresql-18&suite=experimental

(No reply from linux-mm yet.)

Christoph

Work around a Linux bug in move_pages

In 32-bit mode on 64-bit kernels, move_pages() does not correctly advance to
the next chunk. Work around by not asking for more than 16 pages at once so
move_pages() internal loop is not executed more than once.

https://www.postgresql.org/message-id/flat/a3a4fe3d-1a80-4e03-aa8e-150ee15f6c35%40vondra.me#6abe7eaa...
https://marc.info/?l=linux-mm&m=175077821909222&w=2

--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -390,8 +390,15 @@ pg_buffercache_numa_pages(PG_FUNCTION_AR
 		memset(os_page_status, 0xff, sizeof(int) * os_page_count);
 
 		/* Query NUMA status for all the pointers */
-		if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
-			elog(ERROR, "failed NUMA pages inquiry: %m");
+#define NUMA_QUERY_CHUNK_SIZE 16  /* has to be <= DO_PAGES_STAT_CHUNK_NR (do_pages_stat())*/
+		for (uint64 chunk_start = 0; chunk_start < os_page_count; chunk_start += NUMA_QUERY_CHUNK_SIZE) {
+			uint64 chunk_size = Min(NUMA_QUERY_CHUNK_SIZE, os_page_count - chunk_start);
+
+			if (pg_numa_query_pages(0, chunk_size, &os_page_ptrs[chunk_start],
+						&os_page_status[chunk_start]) == -1)
+				elog(ERROR, "failed NUMA pages inquiry status: %m");
+		}
+#undef NUMA_QUERY_CHUNK_SIZE
 
 		/* Initialize the multi-call context, load entries about buffers */
 
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -689,8 +689,15 @@ pg_get_shmem_allocations_numa(PG_FUNCTIO
 			CHECK_FOR_INTERRUPTS();
 		}
 
-		if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
-			elog(ERROR, "failed NUMA pages inquiry status: %m");
+#define NUMA_QUERY_CHUNK_SIZE 16  /* has to be <= DO_PAGES_STAT_CHUNK_NR (do_pages_stat())*/
+		for (uint64 chunk_start = 0; chunk_start < shm_ent_page_count; chunk_start += NUMA_QUERY_CHUNK_SIZE) {
+			uint64 chunk_size = Min(NUMA_QUERY_CHUNK_SIZE, shm_ent_page_count - chunk_start);
+
+			if (pg_numa_query_pages(0, chunk_size, &page_ptrs[chunk_start],
+						&pages_status[chunk_start]) == -1)
+				elog(ERROR, "failed NUMA pages inquiry status: %m");
+		}
+#undef NUMA_QUERY_CHUNK_SIZE
 
 		/* Count number of NUMA nodes used for this shared memory entry */
 		memset(nodes, 0, sizeof(Size) * (max_nodes + 1));


Attachments:

  [text/plain] move-pages32 (2.3K, 2-move-pages32)
  download | inline diff:
Work around a Linux bug in move_pages

In 32-bit mode on 64-bit kernels, move_pages() does not correctly advance to
the next chunk. Work around by not asking for more than 16 pages at once so
move_pages() internal loop is not executed more than once.

https://www.postgresql.org/message-id/flat/a3a4fe3d-1a80-4e03-aa8e-150ee15f6c35%40vondra.me#6abe7eaa802b5b07bb70cc3229e63a9f
https://marc.info/?l=linux-mm&m=175077821909222&w=2

--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -390,8 +390,15 @@ pg_buffercache_numa_pages(PG_FUNCTION_AR
 		memset(os_page_status, 0xff, sizeof(int) * os_page_count);
 
 		/* Query NUMA status for all the pointers */
-		if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
-			elog(ERROR, "failed NUMA pages inquiry: %m");
+#define NUMA_QUERY_CHUNK_SIZE 16  /* has to be <= DO_PAGES_STAT_CHUNK_NR (do_pages_stat())*/
+		for (uint64 chunk_start = 0; chunk_start < os_page_count; chunk_start += NUMA_QUERY_CHUNK_SIZE) {
+			uint64 chunk_size = Min(NUMA_QUERY_CHUNK_SIZE, os_page_count - chunk_start);
+
+			if (pg_numa_query_pages(0, chunk_size, &os_page_ptrs[chunk_start],
+						&os_page_status[chunk_start]) == -1)
+				elog(ERROR, "failed NUMA pages inquiry status: %m");
+		}
+#undef NUMA_QUERY_CHUNK_SIZE
 
 		/* Initialize the multi-call context, load entries about buffers */
 
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -689,8 +689,15 @@ pg_get_shmem_allocations_numa(PG_FUNCTIO
 			CHECK_FOR_INTERRUPTS();
 		}
 
-		if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
-			elog(ERROR, "failed NUMA pages inquiry status: %m");
+#define NUMA_QUERY_CHUNK_SIZE 16  /* has to be <= DO_PAGES_STAT_CHUNK_NR (do_pages_stat())*/
+		for (uint64 chunk_start = 0; chunk_start < shm_ent_page_count; chunk_start += NUMA_QUERY_CHUNK_SIZE) {
+			uint64 chunk_size = Min(NUMA_QUERY_CHUNK_SIZE, shm_ent_page_count - chunk_start);
+
+			if (pg_numa_query_pages(0, chunk_size, &page_ptrs[chunk_start],
+						&pages_status[chunk_start]) == -1)
+				elog(ERROR, "failed NUMA pages inquiry status: %m");
+		}
+#undef NUMA_QUERY_CHUNK_SIZE
 
 		/* Count number of NUMA nodes used for this shared memory entry */
 		memset(nodes, 0, sizeof(Size) * (max_nodes + 1));


view thread (83+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]
  Subject: Re: pgsql: Introduce pg_shmem_allocations_numa view
  In-Reply-To: <[email protected]>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox