public inbox for [email protected]
help / color / mirror / Atom feedFrom: Bossart, Nathan <[email protected]>
To: Michael Paquier <[email protected]>
Cc: Justin Pryzby <[email protected]>
Cc: Andres Freund <[email protected]>
Cc: Magnus Hagander <[email protected]>
Cc: Mark Dilger <[email protected]>
Cc: Don Seiler <[email protected]>
Cc: [email protected] <[email protected]>
Subject: Re: Estimating HugePages Requirements?
Date: Tue, 31 Aug 2021 05:37:52 +0000
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>
<[email protected]>
<CABUevEwGHcsmp4rgZdcbinZOdEs_cSCabihDvRty=0zz1H95kw@mail.gmail.com>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
On 8/30/21, 12:29 AM, "Michael Paquier" <[email protected]> wrote:
> Attached is a WIP to show how the order of the operations could be
> changed, as that's easier to grasp. Even if we don't do that, having
> the GUC and the refactoring of CalculateShmemSize() would still be
> useful, as one could just query an existing instance for an estimation
> of huge pages for a cloned one.
>
> The GUC shared_memory_size should have GUC_NOT_IN_SAMPLE and
> GUC_DISALLOW_IN_FILE, with some documentation, of course. I added the
> flags to the GUC, not the docs. The code setting up the GUC is not
> good either. It would make sense to just have that in a small wrapper
> of ipci.c, perhaps.
I moved the GUC calculation to ipci.c, adjusted the docs, and added a
huge_pages_required GUC. It's still a little rough around the edges,
and I haven't tested it on Windows, but this seems like the direction
the patch is headed.
Nathan
Attachments:
[application/octet-stream] v3-0001-Move-the-shared-memory-size-calculation-to-its-ow.patch (7.1K, 2-v3-0001-Move-the-shared-memory-size-calculation-to-its-ow.patch)
download | inline diff:
From 68a6974c73ef512ceb8e35649bf0add4b3547fa3 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Fri, 27 Aug 2021 20:03:01 +0000
Subject: [PATCH v3 1/2] Move the shared memory size calculation to its own
function.
This change refactors the shared memory size calculation in
CreateSharedMemoryAndSemaphores() to its own function. This is
intended for use in a future change that will simplify the steps
for setting up huge pages.
---
src/backend/storage/ipc/ipci.c | 142 ++++++++++++++++++++++++-----------------
src/include/storage/ipc.h | 1 +
2 files changed, 84 insertions(+), 59 deletions(-)
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 3e4ec53a97..b225b1ee70 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -75,6 +75,87 @@ RequestAddinShmemSpace(Size size)
total_addin_request = add_size(total_addin_request, size);
}
+/*
+ * CalculateShmemSize
+ * Calculates the amount of shared memory and number of semaphores needed.
+ *
+ * If num_semaphores is not NULL, it will be set to the number of semaphores
+ * required.
+ *
+ * Note that this function freezes the additional shared memory request size
+ * from loadable modules.
+ */
+Size
+CalculateShmemSize(int *num_semaphores)
+{
+ Size size;
+ int numSemas;
+
+ /* Compute number of semaphores we'll need */
+ numSemas = ProcGlobalSemas();
+ numSemas += SpinlockSemas();
+
+ /* Return the number of semaphores if requested by the caller */
+ if (num_semaphores)
+ *num_semaphores = numSemas;
+
+ /*
+ * Size of the Postgres shared-memory block is estimated via moderately-
+ * accurate estimates for the big hogs, plus 100K for the stuff that's too
+ * small to bother with estimating.
+ *
+ * We take some care to ensure that the total size request doesn't overflow
+ * size_t. If this gets through, we don't need to be so careful during the
+ * actual allocation phase.
+ */
+ size = 100000;
+ size = add_size(size, PGSemaphoreShmemSize(numSemas));
+ size = add_size(size, SpinlockSemaSize());
+ size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
+ sizeof(ShmemIndexEnt)));
+ size = add_size(size, dsm_estimate_size());
+ size = add_size(size, BufferShmemSize());
+ size = add_size(size, LockShmemSize());
+ size = add_size(size, PredicateLockShmemSize());
+ size = add_size(size, ProcGlobalShmemSize());
+ size = add_size(size, XLOGShmemSize());
+ size = add_size(size, CLOGShmemSize());
+ size = add_size(size, CommitTsShmemSize());
+ size = add_size(size, SUBTRANSShmemSize());
+ size = add_size(size, TwoPhaseShmemSize());
+ size = add_size(size, BackgroundWorkerShmemSize());
+ size = add_size(size, MultiXactShmemSize());
+ size = add_size(size, LWLockShmemSize());
+ size = add_size(size, ProcArrayShmemSize());
+ size = add_size(size, BackendStatusShmemSize());
+ size = add_size(size, SInvalShmemSize());
+ size = add_size(size, PMSignalShmemSize());
+ size = add_size(size, ProcSignalShmemSize());
+ size = add_size(size, CheckpointerShmemSize());
+ size = add_size(size, AutoVacuumShmemSize());
+ size = add_size(size, ReplicationSlotsShmemSize());
+ size = add_size(size, ReplicationOriginShmemSize());
+ size = add_size(size, WalSndShmemSize());
+ size = add_size(size, WalRcvShmemSize());
+ size = add_size(size, PgArchShmemSize());
+ size = add_size(size, ApplyLauncherShmemSize());
+ size = add_size(size, SnapMgrShmemSize());
+ size = add_size(size, BTreeShmemSize());
+ size = add_size(size, SyncScanShmemSize());
+ size = add_size(size, AsyncShmemSize());
+#ifdef EXEC_BACKEND
+ size = add_size(size, ShmemBackendArraySize());
+#endif
+
+ /* freeze the addin request size and include it */
+ addin_request_allowed = false;
+ size = add_size(size, total_addin_request);
+
+ /* might as well round it off to a multiple of a typical page size */
+ size = add_size(size, 8192 - (size % 8192));
+
+ return size;
+}
/*
* CreateSharedMemoryAndSemaphores
@@ -102,65 +183,8 @@ CreateSharedMemoryAndSemaphores(void)
Size size;
int numSemas;
- /* Compute number of semaphores we'll need */
- numSemas = ProcGlobalSemas();
- numSemas += SpinlockSemas();
-
- /*
- * Size of the Postgres shared-memory block is estimated via
- * moderately-accurate estimates for the big hogs, plus 100K for the
- * stuff that's too small to bother with estimating.
- *
- * We take some care during this phase to ensure that the total size
- * request doesn't overflow size_t. If this gets through, we don't
- * need to be so careful during the actual allocation phase.
- */
- size = 100000;
- size = add_size(size, PGSemaphoreShmemSize(numSemas));
- size = add_size(size, SpinlockSemaSize());
- size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
- sizeof(ShmemIndexEnt)));
- size = add_size(size, dsm_estimate_size());
- size = add_size(size, BufferShmemSize());
- size = add_size(size, LockShmemSize());
- size = add_size(size, PredicateLockShmemSize());
- size = add_size(size, ProcGlobalShmemSize());
- size = add_size(size, XLOGShmemSize());
- size = add_size(size, CLOGShmemSize());
- size = add_size(size, CommitTsShmemSize());
- size = add_size(size, SUBTRANSShmemSize());
- size = add_size(size, TwoPhaseShmemSize());
- size = add_size(size, BackgroundWorkerShmemSize());
- size = add_size(size, MultiXactShmemSize());
- size = add_size(size, LWLockShmemSize());
- size = add_size(size, ProcArrayShmemSize());
- size = add_size(size, BackendStatusShmemSize());
- size = add_size(size, SInvalShmemSize());
- size = add_size(size, PMSignalShmemSize());
- size = add_size(size, ProcSignalShmemSize());
- size = add_size(size, CheckpointerShmemSize());
- size = add_size(size, AutoVacuumShmemSize());
- size = add_size(size, ReplicationSlotsShmemSize());
- size = add_size(size, ReplicationOriginShmemSize());
- size = add_size(size, WalSndShmemSize());
- size = add_size(size, WalRcvShmemSize());
- size = add_size(size, PgArchShmemSize());
- size = add_size(size, ApplyLauncherShmemSize());
- size = add_size(size, SnapMgrShmemSize());
- size = add_size(size, BTreeShmemSize());
- size = add_size(size, SyncScanShmemSize());
- size = add_size(size, AsyncShmemSize());
-#ifdef EXEC_BACKEND
- size = add_size(size, ShmemBackendArraySize());
-#endif
-
- /* freeze the addin request size and include it */
- addin_request_allowed = false;
- size = add_size(size, total_addin_request);
-
- /* might as well round it off to a multiple of a typical page size */
- size = add_size(size, 8192 - (size % 8192));
-
+ /* Compute the size of the shared-memory block */
+ size = CalculateShmemSize(&numSemas);
elog(DEBUG3, "invoking IpcMemoryCreate(size=%zu)", size);
/*
diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h
index 753a6dd4d7..80e191d407 100644
--- a/src/include/storage/ipc.h
+++ b/src/include/storage/ipc.h
@@ -77,6 +77,7 @@ extern void check_on_shmem_exit_lists_are_empty(void);
/* ipci.c */
extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook;
+extern Size CalculateShmemSize(int *num_semaphores);
extern void CreateSharedMemoryAndSemaphores(void);
#endif /* IPC_H */
--
2.16.6
[application/octet-stream] v3-0002-Introduce-shared_memory_size-and-huge_pages_requi.patch (12.0K, 3-v3-0002-Introduce-shared_memory_size-and-huge_pages_requi.patch)
download | inline diff:
From c7e8d7937a940ecc5d22c4cc757d452bb536ac3e Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Tue, 31 Aug 2021 05:05:43 +0000
Subject: [PATCH v3 2/2] Introduce shared_memory_size and huge_pages_required
GUCs.
These parameters are intended to simplify huge pages setup.
Instead of manually calculating the number of huge pages required
for the main shared memory segment, a command like the following
can be used to determine how many are needed:
postgres -D $PGDATA -C huge_pages_required
---
doc/src/sgml/config.sgml | 30 ++++++++++++++++++++
doc/src/sgml/runtime.sgml | 33 +++++++---------------
src/backend/port/sysv_shmem.c | 2 +-
src/backend/postmaster/postmaster.c | 55 ++++++++++++++++++++++---------------
src/backend/storage/ipc/ipci.c | 48 ++++++++++++++++++++++++++++++++
src/backend/utils/misc/guc.c | 25 +++++++++++++++++
src/include/storage/ipc.h | 1 +
src/include/storage/pg_shmem.h | 4 +++
8 files changed, 152 insertions(+), 46 deletions(-)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2c31c35a6b..e586427640 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -10101,6 +10101,22 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
</listitem>
</varlistentry>
+ <varlistentry id="guc-huge-pages-required" xreflabel="huge_pages_required">
+ <term><varname>huge_pages_required</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>huge_pages_required</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Reports the number of huge pages that are required for the main
+ shared memory segment based on the specified
+ <xref linkend="guc-huge-page-size"/>. If the huge page size cannot
+ be determined, this will be <literal>-1</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-integer-datetimes" xreflabel="integer_datetimes">
<term><varname>integer_datetimes</varname> (<type>boolean</type>)
<indexterm>
@@ -10275,6 +10291,20 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
</listitem>
</varlistentry>
+ <varlistentry id="guc-shared-memory-size" xreflabel="shared_memory_size">
+ <term><varname>shared_memory_size</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>shared_memory_size</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Reports the size of the main shared memory segment, rounded up to
+ the nearest megabyte.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-ssl-library" xreflabel="ssl_library">
<term><varname>ssl_library</varname> (<type>string</type>)
<indexterm>
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index f1cbc1d9e9..28bc36283e 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1442,32 +1442,19 @@ export PG_OOM_ADJUST_VALUE=0
with <varname>CONFIG_HUGETLBFS=y</varname> and
<varname>CONFIG_HUGETLB_PAGE=y</varname>. You will also have to configure
the operating system to provide enough huge pages of the desired size.
- To estimate the number of huge pages needed, start
- <productname>PostgreSQL</productname> without huge pages enabled and check
- the postmaster's anonymous shared memory segment size, as well as the
- system's default and supported huge page sizes, using the
- <filename>/proc</filename> and <filename>/sys</filename> file systems.
- This might look like:
+ To estimate the number of huge pages needed, use the
+ <command>postgres</command> command to see the value of
+ <xref linkend="guc-huge-pages-required"/>. This might look like:
<programlisting>
-$ <userinput>head -1 $PGDATA/postmaster.pid</userinput>
-4170
-$ <userinput>pmap 4170 | awk '/rw-s/ && /zero/ {print $2}'</userinput>
-6490428K
-$ <userinput>grep ^Hugepagesize /proc/meminfo</userinput>
-Hugepagesize: 2048 kB
-$ <userinput>ls /sys/kernel/mm/hugepages</userinput>
-hugepages-1048576kB hugepages-2048kB
+$ <userinput>postgres -D $PGDATA -C huge_pages_required</userinput>
+3170
</programlisting>
- In this example the default is 2MB, but you can also explicitly request
- either 2MB or 1GB with <xref linkend="guc-huge-page-size"/>.
-
- Assuming <literal>2MB</literal> huge pages,
- <literal>6490428</literal> / <literal>2048</literal> gives approximately
- <literal>3169.154</literal>, so in this example we need at
- least <literal>3170</literal> huge pages. A larger setting would be
- appropriate if other programs on the machine also need huge pages.
- We can set this with:
+ Note that you can explicitly request either 2MB or 1GB huge pages with
+ <xref linkend="guc-huge-page-size"/>. While we need at least
+ <literal>3170</literal> huge pages in this example, a larger setting
+ would be appropriate if other programs on the machine also need huge
+ pages. We can allocate the huge pages with:
<programlisting>
# <userinput>sysctl -w vm.nr_hugepages=3170</userinput>
</programlisting>
diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 9de96edf6a..f42f1ac171 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -478,7 +478,7 @@ PGSharedMemoryAttach(IpcMemoryId shmId,
* Returns the (real, assumed or config provided) page size into *hugepagesize,
* and the hugepage-related mmap flags to use into *mmap_flags.
*/
-static void
+void
GetHugePageSize(Size *hugepagesize, int *mmap_flags)
{
Size default_hugepagesize = 0;
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 9c2c98614a..c32c21d632 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -893,6 +893,39 @@ PostmasterMain(int argc, char *argv[])
if (!SelectConfigFiles(userDoption, progname))
ExitPostmaster(2);
+ /* Verify that DataDir looks reasonable */
+ checkDataDir();
+
+ /* Check that pg_control exists */
+ checkControlFile();
+
+ /* And switch working directory into it */
+ ChangeToDataDir();
+
+ /*
+ * Register the apply launcher. Since it registers a background worker,
+ * it needs to be called before InitializeMaxBackends(), and it's probably
+ * a good idea to call it before any modules had chance to take the
+ * background worker slots.
+ */
+ ApplyLauncherRegister();
+
+ /*
+ * Process any libraries that should be preloaded at postmaster start.
+ * Thie happens before running -C, so as it is possible to get an
+ * estimation of the total shared memory size allocated to this system,
+ * accounting for the portion from loaded libraries.
+ */
+ process_shared_preload_libraries();
+
+ /*
+ * Determine the value of any runtime-computed GUCs that depend on the
+ * amount of shared memory required. It is important to do this after
+ * preloaded libraries have had a chance to request additional shared
+ * memory.
+ */
+ InitializeShmemGUCs();
+
if (output_config_variable != NULL)
{
/*
@@ -907,15 +940,6 @@ PostmasterMain(int argc, char *argv[])
ExitPostmaster(0);
}
- /* Verify that DataDir looks reasonable */
- checkDataDir();
-
- /* Check that pg_control exists */
- checkControlFile();
-
- /* And switch working directory into it */
- ChangeToDataDir();
-
/*
* Check for invalid combinations of GUC settings.
*/
@@ -996,19 +1020,6 @@ PostmasterMain(int argc, char *argv[])
*/
LocalProcessControlFile(false);
- /*
- * Register the apply launcher. Since it registers a background worker,
- * it needs to be called before InitializeMaxBackends(), and it's probably
- * a good idea to call it before any modules had chance to take the
- * background worker slots.
- */
- ApplyLauncherRegister();
-
- /*
- * process any libraries that should be preloaded at postmaster start
- */
- process_shared_preload_libraries();
-
/*
* Initialize SSL library, if specified.
*/
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index b225b1ee70..86061da1bc 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -14,6 +14,10 @@
*/
#include "postgres.h"
+#ifndef WIN32
+#include <sys/mman.h>
+#endif
+
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heapam.h"
@@ -313,3 +317,47 @@ CreateSharedMemoryAndSemaphores(void)
if (shmem_startup_hook)
shmem_startup_hook();
}
+
+/*
+ * InitializeShmemGUCs
+ *
+ * This function initializes runtime-computed GUCs related to the amount of
+ * shared memory required for the current configuration.
+ */
+void
+InitializeShmemGUCs(void)
+{
+ char buf[64];
+ Size size_b;
+ Size size_mb;
+#if defined(MAP_HUGETLB) || defined(WIN32)
+ Size hp_size;
+ Size hp_required;
+#endif
+#ifdef MAP_HUGETLB
+ int unused;
+#endif
+
+ /*
+ * Calculate the shared memory size in bytes and in megabytes (rounded
+ * up to the nearest megabyte).
+ */
+ size_b = CalculateShmemSize(NULL);
+ size_mb = add_size(size_b, (1024 * 1024) - 1) / (1024 * 1024);
+
+ sprintf(buf, "%lu MB", size_mb);
+ SetConfigOption("shared_memory_size", buf, PGC_INTERNAL, PGC_S_OVERRIDE);
+
+ /* Calculate the number of huge_pages required */
+#if defined(MAP_HUGETLB)
+ GetHugePageSize(&hp_size, &unused);
+#elif defined(WIN32)
+ hp_size = GetLargePageMinimum();
+#endif
+
+#if defined(MAP_HUGETLB) || defined(WIN32)
+ hp_required = (size_b / hp_size) + 1;
+ sprintf(buf, "%lu", hp_required);
+ SetConfigOption("huge_pages_required", buf, PGC_INTERNAL, PGC_S_OVERRIDE);
+#endif
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 467b0fd6fe..0d4dd27394 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -620,6 +620,9 @@ char *pgstat_temp_directory;
char *application_name;
+int shmem_size_mb;
+int huge_pages_required;
+
int tcp_keepalives_idle;
int tcp_keepalives_interval;
int tcp_keepalives_count;
@@ -2223,6 +2226,17 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"huge_pages_required", PGC_INTERNAL, RESOURCES_MEM,
+ gettext_noop("Shows the number of huge pages needed for the main shared memory area."),
+ gettext_noop("-1 indicates that the huge page size could not be determined."),
+ GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
+ },
+ &huge_pages_required,
+ -1, -1, INT_MAX,
+ NULL, NULL, NULL
+ },
+
{
/* This is PGC_SUSET to prevent hiding from log_lock_waits. */
{"deadlock_timeout", PGC_SUSET, LOCK_MANAGEMENT,
@@ -2337,6 +2351,17 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"shared_memory_size", PGC_INTERNAL, RESOURCES_MEM,
+ gettext_noop("Shows the amount of shared memory allocated by the server (rounded up to the nearest MB)."),
+ NULL,
+ GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE | GUC_UNIT_MB
+ },
+ &shmem_size_mb,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
{
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
gettext_noop("Sets the maximum number of temporary buffers used by each session."),
diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h
index 80e191d407..7a1ebc8559 100644
--- a/src/include/storage/ipc.h
+++ b/src/include/storage/ipc.h
@@ -79,5 +79,6 @@ extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook;
extern Size CalculateShmemSize(int *num_semaphores);
extern void CreateSharedMemoryAndSemaphores(void);
+extern void InitializeShmemGUCs(void);
#endif /* IPC_H */
diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h
index 059df1b72c..c44403ed6a 100644
--- a/src/include/storage/pg_shmem.h
+++ b/src/include/storage/pg_shmem.h
@@ -88,4 +88,8 @@ extern PGShmemHeader *PGSharedMemoryCreate(Size size,
extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
extern void PGSharedMemoryDetach(void);
+#ifdef MAP_HUGETLB
+extern void GetHugePageSize(Size *hugepagesize, int *mmap_flags);
+#endif
+
#endif /* PG_SHMEM_H */
--
2.16.6
view thread (108+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]
Subject: Re: Estimating HugePages Requirements?
In-Reply-To: <[email protected]>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox