From 26854803952ca19d9eafa877e452eda59799a223 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Mon, 16 Mar 2026 15:43:38 +1300 Subject: [PATCH v4 07/21] pg_stack_alloc: Use alloca() for allocation. Add support for alloca(), with defenses against stack overflow. The existing per-function limit applies, and a secondary limit that stops allocating on the stack if the total stack depth has reached 50% of the size that would cause check_stack_depth() to ereport(). The default per-function limit is increased to 1024 when using alloca(). This is available only on systems where the stack grows down towards zero using GCC, Clang or MSVC, but that's all the systems we support. The array-based implementation is still there but won't be used by default. Reviewed-by: Discussion: --- configure | 38 +++++ configure.ac | 1 + meson.build | 1 + src/include/pg_config.h.in | 3 + src/include/utils/pg_stack_alloc.h | 236 ++++++++++++++++++++++++++++- src/test/regress/regress.c | 78 ++++++++++ 6 files changed, 352 insertions(+), 5 deletions(-) diff --git a/configure b/configure index a5ee71cf728..efe6249b283 100755 --- a/configure +++ b/configure @@ -16233,6 +16233,44 @@ cat >>confdefs.h <<_ACEOF #define HAVE__BUILTIN_STACK_ADDRESS 1 _ACEOF +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_alloca" >&5 +$as_echo_n "checking for __builtin_alloca... " >&6; } +if ${pgac_cv__builtin_alloca+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +void * +call__builtin_alloca(void) +{ + return __builtin_alloca(0); +} +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv__builtin_alloca=yes +else + pgac_cv__builtin_alloca=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_alloca" >&5 +$as_echo "$pgac_cv__builtin_alloca" >&6; } +if test x"${pgac_cv__builtin_alloca}" = xyes ; then + +cat >>confdefs.h <<_ACEOF +#define HAVE__BUILTIN_ALLOCA 1 +_ACEOF + fi # We require 64-bit fseeko() to be available, but run this check anyway diff --git a/configure.ac b/configure.ac index b13cfa1813f..50e874a3b70 100644 --- a/configure.ac +++ b/configure.ac @@ -1881,6 +1881,7 @@ PGAC_CHECK_BUILTIN_FUNC([__builtin_ctz], [unsigned int x]) # so it needs a different test function. PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_frame_address], [0]) PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_stack_address], []) +PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_alloca], [0]) # We require 64-bit fseeko() to be available, but run this check anyway # in case it finds that _LARGEFILE_SOURCE has to be #define'd for that. diff --git a/meson.build b/meson.build index db1f38dff93..25272c43a36 100644 --- a/meson.build +++ b/meson.build @@ -2042,6 +2042,7 @@ endif # Check if various builtins exist. Some builtins are tested separately, # because we want to test something more complicated than the generic case. builtins = [ + 'alloca', 'bswap16', 'bswap32', 'bswap64', diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index f956012be3d..66df0e833c8 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -514,6 +514,9 @@ /* Define to 1 if you have XSAVE intrinsics. */ #undef HAVE_XSAVE_INTRINSICS +/* Define to 1 if your compiler understands __builtin_alloca. */ +#undef HAVE__BUILTIN_ALLOCA + /* Define to 1 if your compiler understands __builtin_bswap16. */ #undef HAVE__BUILTIN_BSWAP16 diff --git a/src/include/utils/pg_stack_alloc.h b/src/include/utils/pg_stack_alloc.h index cd3c3838013..a82ed7cdf0e 100644 --- a/src/include/utils/pg_stack_alloc.h +++ b/src/include/utils/pg_stack_alloc.h @@ -3,16 +3,20 @@ * pg_stack_alloc.h * Allocator for objects that don't escape the current function. * - * A palloc()-like interface for allocating memory on the stack. The initial - * implementation uses an array declared statically. + * A palloc()-like interface to alloca(), for allocating memory on the stack. + * Raw alloca() is usually considered dangerous because of its inherent stack + * overflow risk, but this interface imposes limits on stack size and falls + * back to regular palloc() when they would be exceeded. + * + * GCC, Clang and MSVC are supported, as long as PG_STACK_DIRECTION is + * downwards (all modern systems). A simple array-based emulation + * is provided for systems that can't use that. * * Once stack space is exhausted, allocations silently fall back to using * palloc(). Memory should therefore still be freed explicitly with * pg_stack_free() or MemoryContext-level cleanup. It is a no-op in the * common case that pfree() doesn't need to be called. * - * XXX A space-limited version of alloca() could be added. - * * XXX It might be possible to use something like "defer" or equivalent * compiler extensions to clean up palloc()'d memory automatically, in future * work, and then pg_stack_free() would not be necessary. @@ -28,6 +32,7 @@ #define PG_STACK_ALLOC_H #include "utils/elog.h" +#include "utils/memutils.h" /* for MaxAllocSize */ #include "utils/palloc.h" #include "miscadmin.h" @@ -36,13 +41,35 @@ /* #define PG_STACK_USE_PALLOC_LOG "/tmp/pg_stack_alloc.csv" */ +/* #define PG_STACK_USE_ARRAY */ + +/* Spelling and alignment of alloca() on this system. */ +#ifdef HAVE__BUILTIN_ALLOCA +/* + * Using the builtin avoids the need to figure out which header to include on + * each platform, and ensures we get GCC/Clang's documented behavior and not + * some other alloca() implementation technique with unknown characteristics. + */ +#define pg_stack_alloca(size) __builtin_alloca(size) +#define ALIGNOF_ALLOCA __BIGGEST_ALIGNMENT__ +#elif defined(_MSC_VER) +#include +#define pg_stack_alloca(size) alloca(size) +/* https://learn.microsoft.com/en-us/cpp/build/stack-usage?view=msvc-170 */ +#define ALIGNOF_ALLOCA 16 +#endif /* Choose which implementation to use, if not already defined manually. */ #if !defined(PG_STACK_USE_ARRAY) && \ + !defined(PG_STACK_USE_ALLOC) && \ !defined(PG_STACK_USE_PALLOC) && \ !defined(PG_STACK_USE_PALLOC_LOG) +#if PG_STACK_DIRECTION < 0 && defined(pg_stack_alloca) +#define PG_STACK_USE_ALLOCA +#else #define PG_STACK_USE_ARRAY #endif +#endif /*------------------------------------------------------------------------- @@ -56,8 +83,13 @@ #define PG_STACK_MAX_ALIGN 4096 /* Declare a stack allocator with a default size limit. */ +#ifdef PG_STACK_USE_ARRAY #define DECLARE_PG_STACK() \ DECLARE_PG_STACK_SIZE(128) +#else +#define DECLARE_PG_STACK() \ + DECLARE_PG_STACK_SIZE(1024) +#endif /* * As above, but with a caller-supplied limit on stack usage. The default @@ -155,7 +187,13 @@ */ #define pg_stack_sanity_checks(align) \ (AssertMacro((align) > 0 && (align) <= PG_STACK_MAX_ALIGN), \ - AssertMacro(((align) & ((align) - 1)) == 0 /* power-of-two? */)) + AssertMacro(((align) & ((align) - 1)) == 0 /* power-of-two? */), \ + StaticAssertExpr(!pg_in_lexical_scope_p(PG_TRY), \ + "pg_stack API not allowed in PG_TRY"), \ + StaticAssertExpr(!pg_in_lexical_scope_p(PG_CATCH), \ + "pg_stack API not allowed in PG_CATCH"), \ + StaticAssertExpr(!pg_in_lexical_scope_p(PG_FINALLY), \ + "pg_stack API not allowed in PG_FINALLY")) /* For assertions. */ static inline bool @@ -358,4 +396,192 @@ pg_stack_alloc_aligned_from_array(char *array, #endif + +/*------------------------------------------------------------------------- + * + * alloca()-based implementation. + * + *------------------------------------------------------------------------- + */ +#ifdef PG_STACK_USE_ALLOCA + +/* Required interface macros. */ + +#define DECLARE_PG_STACK_IMPL(size) \ + pg_stack_impl_decl \ + const char *pg_stack_limit = \ + pg_stack_compute_limit(pg_stack_lower_bound(), (size)) + +#define pg_stack_alloc_aligned_impl(size, align) \ + (likely(pg_stack_alloca_would_fit_p(pg_stack_lower_bound(), \ + pg_stack_limit, \ + (size), (align))) ? \ + pg_stack_alloca_aligned((size), (align)) : \ + pg_stack_palloc_aligned((size), (align))) + +#define pg_stack_ptr_p(ptr) \ + ((char *) (ptr) >= pg_stack_lower_bound() && \ + (char *) (ptr) <= pg_stack_upper_bound()) + + +/* Compiler-specific ways to inspect the stack's bounds. */ + +/* The stack pointer. */ +#if defined(HAVE__BUILTIN_STACK_ADDRESS) && \ + (!defined(__clang__) || __clang_major__ >= 22) +/* Prefer builtin if available. */ +#define pg_stack_lower_bound() ((const char *) __builtin_stack_address()) +#else +/* + * Calling alloca(0) effectively reads the stack pointer on GCC/Clang/MSVC, + * but not reliably enough for pg_stack_lower_bound()'s purposes due to + * optimizations. It seems good enough for the initial value that determines + * pg_stack_limit though, and better than taking a local variable's address, + * which would be less accurate and hard to get past static analyzers. + * Instead we'll maintain a reliable lower bound in pg_stack_lower_bound_var. + * + * XXX I've only actually seen it break on Apple Clang, where if you're + * unlucky you can see a stack pointer ~16 bytes higher than a recent alloca() + * result, and then pg_stack_ptr_p() is confused and we send it pfree(). + */ +#define pg_stack_lower_bound_init() ((const char *) pg_stack_alloca(0)) +#endif + +/* Upper bound of stack addresses, inclusive. */ +#ifdef HAVE__BUILTIN_FRAME_ADDRESS +#define pg_stack_upper_bound() ((const char *) __builtin_frame_address(0)) +#endif + + +/* Replacement implementations of pg_stack_{upper,lower}_bound(). */ + +/* The base address from stack_depth.c will do if we don't have a builtin. */ +#ifndef pg_stack_upper_bound +#define pg_stack_upper_bound() ((const char *) stack_base_ptr) +#endif + +#ifdef pg_stack_lower_bound +/* Assert that all alloca() results are bounded by the builtin. */ +#define pg_stack_impl_decl \ + char *pg_stack_let_ptr; +#define pg_stack_track(ptr) \ + (pg_stack_let_ptr = (ptr), /* needed for sequencing */ \ + AssertMacro(pg_stack_lower_bound() <= pg_stack_let_ptr), \ + pg_stack_let_ptr) +#else +/* Remember the lowest address ever returned by alloca(). */ +#define pg_stack_impl_decl \ + const char *pg_stack_lower_bound_var = pg_stack_lower_bound_init(); +#define pg_stack_lower_bound() pg_stack_lower_bound_var +#define pg_stack_track(ptr) \ + pg_stack_set_lower_bound(&pg_stack_lower_bound_var, (ptr)) +static inline void * +pg_stack_set_lower_bound(const char **lower_bound, void *ptr) +{ + /* + * Don't assume that alloca()'s result is lower each time. That is + * usually true, but not always. + */ + if ((const char *) ptr < *lower_bound) + *lower_bound = (const char *) ptr; + return ptr; +} +#endif + + +/* Implementation code. */ + +/* Choose a limit address. */ +static inline const char * +pg_stack_compute_limit(const char *sp, size_t size) +{ + const char *limit = sp - size; + + /* stack_depth.c's soft limit overrides the requested size if closer. */ + if ((const char *) stack_soft_limit_ptr > limit) + limit = (const char *) stack_soft_limit_ptr; + + return limit; +} + +/* + * Call alloca(), adjusting for align > ALIGN_ALLOCA if necessary, and + * tracking the lower bound if necessary. + */ +#define pg_stack_alloca_aligned(size, align) \ + pg_stack_realign( \ + pg_stack_track(pg_stack_alloca(pg_stack_pad((size), (align)))), \ + (align)) + +/* Reserve padding space for pg_stack_realign() if stricter than default. */ +static inline size_t +pg_stack_pad(size_t size, size_t align) +{ + if (align <= ALIGNOF_ALLOCA) + return size; + + return (align - ALIGNOF_ALLOCA) + TYPEALIGN(ALIGNOF_ALLOCA, size); +} + +/* Realign alloca()'s result if stricter than default. */ +static inline void * +pg_stack_realign(void *ptr, size_t align) +{ + Assert(pg_stack_ptr_is_aligned_p(ptr, ALIGNOF_ALLOCA)); + + if (align <= ALIGNOF_ALLOCA) + return ptr; + + return (void *) TYPEALIGN(align, ptr); +} + +/* Would we overflow pg_stack_estimate_alloca()'s arithmetic? */ +static inline bool +pg_stack_alloca_would_overflow_p(const char *lower, size_t size, size_t align) +{ + if (align <= ALIGNOF_ALLOCA) + { + /* + * pg_stack_pad() doesn't bother to align its result to ALIGNOF_ALLOCA + * unless requested alignment is stricter and the padding could affect + * the result of x < pg_stack_limit. The latter is usually + * ALIGNOF_ALLOCA-aligned itself, so it'd be a waste of cycles here + * and in pg_stack_estimate_alloca(). + */ + Assert(pg_stack_pad(size, align) == size); + return size > (uintptr_t) lower; + } + + /* Don't let pg_stack_pad() overflow. */ + if (size > MaxAllocSize) + return true; + + /* Don't let pg_stack_estimate_alloca() underflow. */ + return pg_stack_pad(size, align) > (uintptr_t) lower; +} + +/* + * Estimate result of a proposed alloca(), which would become the new + * pg_stack_lower_bound(). The only permitted use of this pointer is to check + * if it'd be below pg_stack_limit. + */ +static inline const char * +pg_stack_estimate_alloca(const char *lower, size_t size, size_t align) +{ + Assert(!pg_stack_alloca_would_overflow_p(lower, size, align)); + + return lower - pg_stack_pad(size, align); +} + +/* Would a proposed alloca() call exceed our limit? */ +static inline bool +pg_stack_alloca_would_fit_p(const char *lower, const char *limit, + size_t size, size_t align) +{ + return !pg_stack_alloca_would_overflow_p(lower, size, align) && + pg_stack_estimate_alloca(lower, size, align) >= limit; +} + +#endif + #endif diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index f82a0a12e8f..396c58a3394 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -1399,6 +1399,9 @@ test_pg_stack_alloc(PG_FUNCTION_ARGS) DECLARE_PG_STACK_SIZE(1024 * 8); /* Too big for the stack. */ +#ifdef PG_STACK_USE_ALLOCA + Assert(!pg_stack_alloca_would_fit_p(pg_stack_lower_bound(), pg_stack_limit, 10000, 8)); +#endif p = pg_stack_alloc(10000); Assert(!pg_stack_ptr_p(p)); pg_stack_free(p); @@ -1445,10 +1448,33 @@ test_pg_stack_alloc(PG_FUNCTION_ARGS) PG_END_TRY(); Assert(raised_error); +#ifdef PG_STACK_USE_ALLOCA + /* Overflow defenses in limit computations. */ + Assert(pg_stack_alloca_would_overflow_p(pg_stack_lower_bound(), (size_t) -1, 1024)); + Assert(pg_stack_alloca_would_overflow_p(pg_stack_lower_bound(), (size_t) -1, 8)); + Assert(!pg_stack_alloca_would_overflow_p(pg_stack_lower_bound(), + MaxAllocSize, + 1024)); + Assert(pg_stack_alloca_would_overflow_p(pg_stack_lower_bound(), + MaxAllocSize + 1, + 1024)); + Assert(pg_stack_alloca_would_overflow_p(pg_stack_lower_bound(), + (size_t) pg_stack_lower_bound() + 1, + 8)); +#endif + /* Test a range of alignments. */ +#ifdef PG_STACK_USE_ALLOCA +#define TEST_ALIGN ALIGNOF_ALLOCA +#else #define TEST_ALIGN MAXIMUM_ALIGNOF +#endif for (int i = 1; i <= TEST_ALIGN * 8; i *= 2) { +#ifdef PG_STACK_USE_ALLOCA + const char *estimate PG_USED_FOR_ASSERTS_ONLY; +#endif + /* Allocate and check alignment is as requested, when we use palloc(). */ p = pg_stack_alloc_aligned(1024 * 8, i); @@ -1456,11 +1482,63 @@ test_pg_stack_alloc(PG_FUNCTION_ARGS) Assert(pg_stack_ptr_is_aligned_p(p, i)); pg_stack_free(p); +#ifdef PG_STACK_USE_ALLOCA + /* Lower bound should be ALIGNOF_ALLOCA-aligned at all times. */ + Assert(pg_stack_ptr_is_aligned_p(pg_stack_lower_bound(), + ALIGNOF_ALLOCA)); + + /* Estimate what alloca() will return. */ + estimate = pg_stack_estimate_alloca(pg_stack_lower_bound(), i, i); + + /* Basic sanity checks on the estimates. */ + if (i > ALIGNOF_ALLOCA) + { + size_t size PG_USED_FOR_ASSERTS_ONLY = i; + size_t align PG_USED_FOR_ASSERTS_ONLY = i; + size_t padding PG_USED_FOR_ASSERTS_ONLY = align - ALIGNOF_ALLOCA; + + Assert(pg_stack_ptr_is_aligned_p(estimate, ALIGNOF_ALLOCA)); + Assert(estimate == pg_stack_lower_bound() - size - padding); + } + else + { + size_t size PG_USED_FOR_ASSERTS_ONLY = i; + + Assert(estimate == pg_stack_lower_bound() - size); + } +#endif + /* Allocate and check alignment is as requested. */ p = pg_stack_alloc_aligned(i, i); Assert(pg_stack_ptr_p(p)); Assert(pg_stack_ptr_is_aligned_p(p, i)); + +#ifdef PG_STACK_COMPARE_ALLOCA_ESTIMATE + + /* + * Check alloca()'s observed behavior against our estimate. + * + * Estimates aren't expected to match perfectly, and this would fail + * on CI because -fsanitize=address changes the results. There may be + * any number of minor details we get wrong on some system or other. + * It passes in regular builds, and is useful for investigating + * implementation details. + */ + if (i > ALIGNOF_ALLOCA) + { + /* Estimate is for the lower bound, and p is realigned. */ + Assert(estimate == pg_stack_lower_bound()); + Assert(p == (const char *) TYPEALIGN(i, estimate)); + } + else + { + /* We don't bother to align default-alignment estimates. */ + estimate = (const char *) TYPEALIGN_DOWN(ALIGNOF_ALLOCA, estimate); + Assert(p == (const char *) TYPEALIGN_DOWN(ALIGNOF_ALLOCA, estimate)); + Assert(p == pg_stack_lower_bound()); + } +#endif } PG_RETURN_VOID(); -- 2.50.1 (Apple Git-155)