public inbox for [email protected]
help / color / mirror / Atom feedFrom: Christoph Berg <[email protected]>
Subject: [PATCH] Don't force-allocate pages for pg_get_shmem_allocations_numa
Date: Mon, 23 Jun 2025 16:37:17 +0200
We tried to force all shared memory pages to be allocated on the first
call to pg_get_shmem_allocations_numa (and pg_buffercache_numa_pages),
but on 32-bit architectures that still left pages with status "EFAULT"
behind.
Instead of forcing the system into some state, just report that state
from these functions.
---
contrib/pg_buffercache/pg_buffercache_pages.c | 21 ----------------
src/backend/storage/ipc/shmem.c | 25 ++-----------------
src/include/port/pg_numa.h | 16 ------------
3 files changed, 2 insertions(+), 60 deletions(-)
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index 4b007f6e1b0..be927176f26 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -102,10 +102,6 @@ PG_FUNCTION_INFO_V1(pg_buffercache_evict_relation);
PG_FUNCTION_INFO_V1(pg_buffercache_evict_all);
-/* Only need to touch memory once per backend process lifetime */
-static bool firstNumaTouch = true;
-
-
Datum
pg_buffercache_pages(PG_FUNCTION_ARGS)
{
@@ -294,10 +290,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
*
* We expect both sizes (for buffers and memory pages) to be a power-of-2, so
* one is always a multiple of the other.
- *
- * In order to get reliable results we also need to touch memory pages, so
- * that the inquiry about NUMA memory node doesn't return -2 (which indicates
- * unmapped/unallocated pages).
*/
Datum
pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
@@ -320,7 +312,6 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
uint64 os_page_count;
int pages_per_buffer;
int max_entries;
- volatile uint64 touch pg_attribute_unused();
char *startptr,
*endptr;
@@ -370,14 +361,8 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
/* Fill pointers for all the memory pages. */
idx = 0;
for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
- {
os_page_ptrs[idx++] = ptr;
- /* Only need to touch memory once per backend process lifetime */
- if (firstNumaTouch)
- pg_numa_touch_mem_if_required(touch, ptr);
- }
-
Assert(idx == os_page_count);
elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
@@ -438,9 +423,6 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
/* Return to original context when allocating transient memory */
MemoryContextSwitchTo(oldcontext);
- if (firstNumaTouch)
- elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
-
/*
* Scan through all the buffers, saving the relevant fields in the
* fctx->record structure.
@@ -503,9 +485,6 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
/* Set max calls and remember the user function context. */
funcctx->max_calls = idx;
funcctx->user_fctx = fctx;
-
- /* Remember this backend touched the pages */
- firstNumaTouch = false;
}
funcctx = SRF_PERCALL_SETUP();
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index c9ae3b45b76..1e02d97e61f 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -90,9 +90,6 @@ slock_t *ShmemLock; /* spinlock for shared memory and LWLock
static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
-/* To get reliable results for NUMA inquiry we need to "touch pages" once */
-static bool firstNumaTouch = true;
-
Datum pg_numa_available(PG_FUNCTION_ARGS);
/*
@@ -634,9 +631,6 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
page_ptrs = palloc0(sizeof(void *) * shm_total_page_count);
pages_status = palloc(sizeof(int) * shm_total_page_count);
- if (firstNumaTouch)
- elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
-
LWLockAcquire(ShmemIndexLock, LW_SHARED);
hash_seq_init(&hstat, ShmemIndex);
@@ -672,21 +666,12 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
/*
* Setup page_ptrs[] with pointers to all OS pages for this segment,
* and get the NUMA status using pg_numa_query_pages.
- *
- * In order to get reliable results we also need to touch memory
- * pages, so that inquiry about NUMA memory node doesn't return -2
- * (ENOENT, which indicates unmapped/unallocated pages).
*/
for (i = 0; i < shm_ent_page_count; i++)
{
volatile uint64 touch pg_attribute_unused();
page_ptrs[i] = startptr + (i * os_page_size);
-
- if (firstNumaTouch)
- pg_numa_touch_mem_if_required(touch, page_ptrs[i]);
-
- CHECK_FOR_INTERRUPTS();
}
if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
@@ -700,13 +685,8 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
int s = pages_status[i];
/* Ensure we are adding only valid index to the array */
- if (s < 0 || s > max_nodes)
- {
- elog(ERROR, "invalid NUMA node id outside of allowed range "
- "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
- }
-
- nodes[s]++;
+ if (s >= 0 && s <= max_nodes)
+ nodes[s]++;
}
/*
@@ -725,7 +705,6 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
}
LWLockRelease(ShmemIndexLock);
- firstNumaTouch = false;
return (Datum) 0;
}
diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h
index 40f1d324dcf..bebc4203184 100644
--- a/src/include/port/pg_numa.h
+++ b/src/include/port/pg_numa.h
@@ -18,20 +18,4 @@ extern PGDLLIMPORT int pg_numa_init(void);
extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status);
extern PGDLLIMPORT int pg_numa_get_max_node(void);
-#ifdef USE_LIBNUMA
-
-/*
- * This is required on Linux, before pg_numa_query_pages() as we
- * need to page-fault before move_pages(2) syscall returns valid results.
- */
-#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
- ro_volatile_var = *(volatile uint64 *) ptr
-
-#else
-
-#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
- do {} while(0)
-
-#endif
-
#endif /* PG_NUMA_H */
--
2.47.2
--p321qshW2LpuyTKe--
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected]
Subject: Re: [PATCH] Don't force-allocate pages for pg_get_shmem_allocations_numa
In-Reply-To: <no-message-id-382@localhost>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox