From b8770f7a523ca8519fcbfbf4f80e76de22dd6ee4 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 5 Mar 2026 21:38:46 +1300 Subject: [PATCH v4 03/21] Use pg_stack_alloc() in locale code. Use stack buffers for temporary C strings, wchar_t strings, pg_wchar strings with the new API. Some cases were already using open-coded arrays, while others nearby were not but are obvious candidates, so let's change them too. These cases used TEXTBUFLEN (1KB), and they inherit the same limit by requesting a non-default size: DECLARE_STACK_BUFFER_SIZE(LOCALE_STACK_SIZE); Common operations that were open-coded as pointer arithmetic and memcpy() are changed to pg_stack_strdup_with_len(...), and operations that worked with wider characters gain a small amount of type-safety. Reviewed-by: Discussion: --- src/backend/utils/adt/pg_locale.c | 6 - src/backend/utils/adt/pg_locale_icu.c | 59 +++------- src/backend/utils/adt/pg_locale_libc.c | 150 +++++++++---------------- src/include/utils/pg_locale.h | 6 + 4 files changed, 77 insertions(+), 144 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 6c5c1019e1e..3a6190892a7 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -60,12 +60,6 @@ #define PGLOCALE_SUPPORT_ERROR(provider) \ elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider) -/* - * This should be large enough that most strings will fit, but small enough - * that we feel comfortable putting it on the stack - */ -#define TEXTBUFLEN 1024 - #define MAX_L10N_DATA 80 /* pg_locale_builtin.c */ diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index 352b4c3885f..3849ac04a8c 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -39,16 +39,9 @@ #include "utils/formatting.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/pg_stack_alloc.h" #include "utils/syscache.h" -/* - * Size of stack buffer to use for string transformations, used to avoid heap - * allocations in typical cases. This should be large enough that most strings - * will fit, but small enough that we feel comfortable putting it on the - * stack. - */ -#define TEXTBUFLEN 1024 - extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); #ifdef USE_ICU @@ -755,23 +748,17 @@ size_t strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; UChar *uchar; int32_t ulen; - size_t uchar_bsize; Size result_bsize; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); - uchar_bsize = (ulen + 1) * sizeof(UChar); - - if (uchar_bsize > TEXTBUFLEN) - buf = palloc(uchar_bsize); - - uchar = (UChar *) buf; + uchar = pg_stack_alloc_array(UChar, ulen + 1); ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); @@ -786,8 +773,7 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, Assert(result_bsize > 0); result_bsize--; - if (buf != sbuf) - pfree(buf); + pg_stack_free(uchar); /* if dest is defined, it should be nul-terminated */ Assert(result_bsize >= destsize || dest[result_bsize] == '\0'); @@ -1020,16 +1006,14 @@ static int strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; int32_t ulen1; int32_t ulen2; - size_t bufsize1; - size_t bufsize2; UChar *uchar1, *uchar2; int result; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); @@ -1040,14 +1024,8 @@ strncoll_icu(const char *arg1, ssize_t len1, ulen1 = uchar_length(icu_converter, arg1, len1); ulen2 = uchar_length(icu_converter, arg2, len2); - bufsize1 = (ulen1 + 1) * sizeof(UChar); - bufsize2 = (ulen2 + 1) * sizeof(UChar); - - if (bufsize1 + bufsize2 > TEXTBUFLEN) - buf = palloc(bufsize1 + bufsize2); - - uchar1 = (UChar *) buf; - uchar2 = (UChar *) (buf + bufsize1); + uchar1 = pg_stack_alloc_array(UChar, ulen1 + 1); + uchar2 = pg_stack_alloc_array(UChar, ulen2 + 1); ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1); ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2); @@ -1056,8 +1034,8 @@ strncoll_icu(const char *arg1, ssize_t len1, uchar1, ulen1, uchar2, ulen2); - if (buf != sbuf) - pfree(buf); + pg_stack_free(uchar1); + pg_stack_free(uchar2); return result; } @@ -1068,16 +1046,15 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; UCharIterator iter; uint32_t state[2]; UErrorCode status; int32_t ulen = -1; UChar *uchar = NULL; - size_t uchar_bsize; Size result_bsize; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); @@ -1085,12 +1062,7 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, ulen = uchar_length(icu_converter, src, srclen); - uchar_bsize = (ulen + 1) * sizeof(UChar); - - if (uchar_bsize > TEXTBUFLEN) - buf = palloc(uchar_bsize); - - uchar = (UChar *) buf; + uchar = pg_stack_alloc_array(UChar, ulen + 1); ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); @@ -1108,8 +1080,7 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, (errmsg("sort key generation failed: %s", u_errorName(status)))); - if (buf != sbuf) - pfree(buf); + pg_stack_free(uchar); return result_bsize; } diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 78f6ea161a0..874eb161c25 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -23,6 +23,7 @@ #include "utils/formatting.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/pg_stack_alloc.h" #include "utils/syscache.h" #ifdef __GLIBC__ @@ -72,14 +73,6 @@ * NB: the coding here assumes pg_wchar is an unsigned type. */ -/* - * Size of stack buffer to use for string transformations, used to avoid heap - * allocations in typical cases. This should be large enough that most strings - * will fit, but small enough that we feel comfortable putting it on the - * stack. - */ -#define TEXTBUFLEN 1024 - extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); static int strncoll_libc(const char *arg1, ssize_t len1, @@ -502,6 +495,8 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t curr_char; size_t max_size; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + if (srclen < 0) srclen = strlen(src); @@ -512,7 +507,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, errmsg("out of memory"))); /* Output workspace cannot have more codes than input bytes */ - workspace = palloc_array(wchar_t, srclen + 1); + workspace = pg_stack_alloc_array(wchar_t, srclen + 1); char2wchar(workspace, srclen + 1, src, srclen, loc); @@ -523,7 +518,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, * Make result large enough; case change might change number of bytes */ max_size = curr_char * pg_database_encoding_max_length(); - result = palloc(max_size + 1); + result = pg_stack_alloc(max_size + 1); result_size = wchar2char(result, workspace, max_size + 1, loc); @@ -533,8 +528,8 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, dest[result_size] = '\0'; } - pfree(workspace); - pfree(result); + pg_stack_free(workspace); + pg_stack_free(result); return result_size; } @@ -607,6 +602,8 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t curr_char; size_t max_size; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + if (srclen < 0) srclen = strlen(src); @@ -617,7 +614,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, errmsg("out of memory"))); /* Output workspace cannot have more codes than input bytes */ - workspace = palloc_array(wchar_t, srclen + 1); + workspace = pg_stack_alloc_array(wchar_t, srclen + 1); char2wchar(workspace, srclen + 1, src, srclen, loc); @@ -634,7 +631,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, * Make result large enough; case change might change number of bytes */ max_size = curr_char * pg_database_encoding_max_length(); - result = palloc(max_size + 1); + result = pg_stack_alloc(max_size + 1); result_size = wchar2char(result, workspace, max_size + 1, loc); @@ -644,8 +641,8 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, dest[result_size] = '\0'; } - pfree(workspace); - pfree(result); + pg_stack_free(workspace); + pg_stack_free(result); return result_size; } @@ -700,6 +697,8 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t curr_char; size_t max_size; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + if (srclen < 0) srclen = strlen(src); @@ -710,7 +709,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, errmsg("out of memory"))); /* Output workspace cannot have more codes than input bytes */ - workspace = palloc_array(wchar_t, srclen + 1); + workspace = pg_stack_alloc_array(wchar_t, srclen + 1); char2wchar(workspace, srclen + 1, src, srclen, loc); @@ -721,7 +720,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, * Make result large enough; case change might change number of bytes */ max_size = curr_char * pg_database_encoding_max_length(); - result = palloc(max_size + 1); + result = pg_stack_alloc(max_size + 1); result_size = wchar2char(result, workspace, max_size + 1, loc); @@ -731,8 +730,8 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, dest[result_size] = '\0'; } - pfree(workspace); - pfree(result); + pg_stack_free(workspace); + pg_stack_free(result); return result_size; } @@ -896,48 +895,25 @@ int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; - size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1; - size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1; - const char *arg1n; - const char *arg2n; + char *cstr1 = NULL; + char *cstr2 = NULL; int result; - if (bufsize1 + bufsize2 > TEXTBUFLEN) - buf = palloc(bufsize1 + bufsize2); + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); /* nul-terminate arguments if necessary */ - if (len1 == -1) - { - arg1n = arg1; - } - else - { - char *buf1 = buf; + if (len1 != -1) + arg1 = cstr1 = pg_stack_strdup_with_len(arg1, len1); - memcpy(buf1, arg1, len1); - buf1[len1] = '\0'; - arg1n = buf1; - } + if (len2 != -1) + arg2 = cstr2 = pg_stack_strdup_with_len(arg2, len2); - if (len2 == -1) - { - arg2n = arg2; - } - else - { - char *buf2 = buf + bufsize1; - - memcpy(buf2, arg2, len2); - buf2[len2] = '\0'; - arg2n = buf2; - } + result = strcoll_l(arg1, arg2, locale->lt); - result = strcoll_l(arg1n, arg2n, locale->lt); - - if (buf != sbuf) - pfree(buf); + if (cstr1) + pg_stack_free(cstr1); + if (cstr2) + pg_stack_free(cstr2); return result; } @@ -953,25 +929,17 @@ size_t strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; - size_t bufsize = srclen + 1; + char *cstr; size_t result; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + if (srclen == -1) return strxfrm_l(dest, src, destsize, locale->lt); - if (bufsize > TEXTBUFLEN) - buf = palloc(bufsize); - - /* nul-terminate argument */ - memcpy(buf, src, srclen); - buf[srclen] = '\0'; - - result = strxfrm_l(dest, buf, destsize, locale->lt); - - if (buf != sbuf) - pfree(buf); + cstr = pg_stack_strdup_with_len(src, srclen); + result = strxfrm_l(dest, cstr, destsize, locale->lt); + pg_stack_free(cstr); /* if dest is defined, it should be nul-terminated */ Assert(result >= destsize || dest[result] == '\0'); @@ -1057,15 +1025,13 @@ static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; - char *a1p, - *a2p; - int a1len; - int a2len; + wchar_t *w1p; + wchar_t *w2p; int r; int result; + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + Assert(GetDatabaseEncoding() == PG_UTF8); if (len1 == -1) @@ -1073,50 +1039,42 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, if (len2 == -1) len2 = strlen(arg2); - a1len = len1 * 2 + 2; - a2len = len2 * 2 + 2; - - if (a1len + a2len > TEXTBUFLEN) - buf = palloc(a1len + a2len); - - a1p = buf; - a2p = buf + a1len; + w1p = pg_stack_alloc_array(wchar_t, len1 + 1); + w2p = pg_stack_alloc_array(wchar_t, len2 + 1); /* API does not work for zero-length input */ if (len1 == 0) r = 0; else { - r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1, - (LPWSTR) a1p, a1len / 2); + r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1, w1p, len1); if (!r) ereport(ERROR, (errmsg("could not convert string to UTF-16: error code %lu", GetLastError()))); } - ((LPWSTR) a1p)[r] = 0; + w1p[r] = 0; if (len2 == 0) r = 0; else { - r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2, - (LPWSTR) a2p, a2len / 2); + r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2, w2p, len2); if (!r) ereport(ERROR, (errmsg("could not convert string to UTF-16: error code %lu", GetLastError()))); } - ((LPWSTR) a2p)[r] = 0; + w2p[r] = 0; errno = 0; - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt); + result = wcscoll_l(w1p, w2p, locale->lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); - if (buf != sbuf) - pfree(buf); + pg_stack_free(w1p); + pg_stack_free(w2p); return result; } @@ -1289,8 +1247,12 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, else #endif /* WIN32 */ { + char *str; + + DECLARE_PG_STACK_SIZE(LOCALE_STACK_SIZE); + /* mbstowcs requires ending '\0' */ - char *str = pnstrdup(from, fromlen); + str = pg_stack_strdup_with_len(from, fromlen); if (loc == (locale_t) 0) { @@ -1303,7 +1265,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, result = mbstowcs_l(to, str, tolen, loc); } - pfree(str); + pg_stack_free(str); } if (result == -1) diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 444350bb803..377d47ff49d 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -17,6 +17,12 @@ /* use for libc locale names */ #define LOCALE_NAME_BUFLEN 128 +/* + * The default limit for DECLARE_PG_STACK() is very low. The code in this + * module requests a custom size for temporary string conversions. + */ +#define LOCALE_STACK_SIZE 1024 + /* * Maximum number of bytes needed to map a single codepoint. Useful for * mapping and processing a single input codepoint at a time with a -- 2.50.1 (Apple Git-155)