public inbox for [email protected]
help / color / mirror / Atom feedFrom: John Naylor <[email protected]>
To: Oleg Tselebrovskiy <[email protected]>
Cc: Andrew Kim <[email protected]>
Cc: [email protected]
Subject: Re: Proposal for enabling auto-vectorization for checksum calculations
Date: Wed, 21 Jan 2026 18:13:02 +0700
Message-ID: <CANWCAZZJ1tQcwWZe4BTgv1E-+bvhe4d0LzJvXeZCFMjRtWpk-w@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>
<CANWCAZYZQw-nzTXbx3Bk332VtY9_D7ksDsuMZ0A-iDZ53yG7Ng@mail.gmail.com>
<CAK64mnfeWLBRbMfnOsag0vGTDnT84KJzpuei40nG0OHyw4SESw@mail.gmail.com>
<CANWCAZa1b2rcvoK657SmcKwh2P2cgASQ1D-0JPj5d3LbfaAVgA@mail.gmail.com>
<CAK64mneN20+sW5WhV+r7hMVo4Rd0z11B6=3L039rWMt1wK3nPg@mail.gmail.com>
<CANWCAZZuS3sNgLRo8Z4AM=uY4zTmz=dH5D4Z9xV6K0CEuJ8Hdw@mail.gmail.com>
<CAK64mnejn9AZMYz03e7HX8Uui35PihUuOy=b+iBG=YtRKx0Log@mail.gmail.com>
<CANWCAZZ_0AQMk1HgHXHX+JaeBfy_4kzwHgTdqMptDA7zM+nm+Q@mail.gmail.com>
<CAK64mnc6jbehHv5AHc84tVFRJg4zeMiFuvPX9xZkRpq0210MFA@mail.gmail.com>
<CANWCAZY940P3wGOQAZWMLQL4MQGGyOu7WBjBEcn_gqcrr+NvAw@mail.gmail.com>
<CAK64mne_oWN9d4mf+0c_5-4Emb9kRXA-OC05OJ4F_1fVqpjzDA@mail.gmail.com>
<CANWCAZZcKYp+01u1QmkShfXVkUCCdxtJAgHT-61Vw0ALoWj47A@mail.gmail.com>
<CAK64mne=Q_4VSpJ8f4RQB-yAThd4+i-BRYMvfdGOhvwJQdYoKQ@mail.gmail.com>
<CANWCAZYg2MVbYTaczNYNC2kaPodtfB8toUfE2Mhp9kut=2wzEA@mail.gmail.com>
<CAK64mnd9NE+xE18shrf-SSx-iwMVof=2DJ2y9_fOkQ5E2Abc5g@mail.gmail.com>
<CANWCAZbjdFnBiUmrBQC5vFFy0Fnn4SJG4AkkzGpTFhovodJdYQ@mail.gmail.com>
<[email protected]>
Attached is v11 to fix headerscheck, per CI.
--
John Naylor
Amazon Web Services
Attachments:
[application/x-patch] v11-0003-Enable-autovectorizing-pg_checksum_block-with-AV.patch (14.7K, 2-v11-0003-Enable-autovectorizing-pg_checksum_block-with-AV.patch)
download | inline diff:
From 5f60e4457fa6e67b2d186895a4f3e10ac87989ec Mon Sep 17 00:00:00 2001
From: John Naylor <[email protected]>
Date: Thu, 8 Jan 2026 18:30:20 +0700
Subject: [PATCH v11 3/3] Enable autovectorizing pg_checksum_block with AVX2
runtime detection
We already rely on autovectorization for computing page checksums,
but on x86 we can get about twice the performance by annotating
pg_checksum_block() with function target attributes for AVX2,
which uses 256-bit registers.
WIP: Runtime detection is okay checksum.c for now, but it'd be better
to refactor feature detection at some point so it's more centralized.
Co-authored-by: Matthew Sterrett <[email protected]>
Co-authored-by: Andrew Kim <[email protected]>
Reviewed-by: Oleg Tselebrovskiy <[email protected]>
Discussion: https://postgr.es/m/CA%2BvA85_5GTu%2BHHniSbvvP%2B8k3%3DxZO%3DWE84NPwiKyxztqvpfZ3Q%40mail.gmail.com
Discussion: https://postgr.es/m/20250911054220.3784-1-root%40ip-172-31-36-228.ec2.internal
---
config/c-compiler.m4 | 26 ++++
configure | 52 ++++++++
configure.ac | 9 ++
meson.build | 30 +++++
src/backend/storage/page/checksum.c | 112 +++++++++++++++++-
src/include/pg_config.h.in | 3 +
src/include/storage/checksum_block_internal.h | 42 +++++++
src/include/storage/checksum_impl.h | 48 +++-----
src/tools/pginclude/headerscheck | 2 +
9 files changed, 290 insertions(+), 34 deletions(-)
create mode 100644 src/include/storage/checksum_block_internal.h
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 1509dbfa2ab..1f3e31fc2d3 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -613,6 +613,32 @@ fi
undefine([Ac_cachevar])dnl
])# PGAC_SSE42_CRC32_INTRINSICS
+# PGAC_AVX2_SUPPORT
+# ---------------------------
+# Check if the compiler supports AVX2 target attribute.
+# This is used for optimized checksum calculations with runtime detection.
+#
+# If AVX2 target attribute is supported, sets pgac_avx2_support.
+AC_DEFUN([PGAC_AVX2_SUPPORT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
+AC_CACHE_CHECK([for AVX2 target attribute support], [Ac_cachevar],
+[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ static int avx2_test(void)
+ {
+ return 0;
+ }
+ #endif],
+ [return avx2_test();])],
+ [Ac_cachevar=yes],
+ [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX2_SUPPORT
+
# PGAC_AVX512_PCLMUL_INTRINSICS
# ---------------------------
# Check if the compiler supports AVX-512 carryless multiplication
diff --git a/configure b/configure
index 04eeb1a741c..72c935c5d83 100755
--- a/configure
+++ b/configure
@@ -17680,6 +17680,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${pgac_cv_avx2_support+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }
+int
+main ()
+{
+return avx2_test();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ pgac_cv_avx2_support=yes
+else
+ pgac_cv_avx2_support=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
+$as_echo "$pgac_cv_avx2_support" >&6; }
+if test x"$pgac_cv_avx2_support" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+
+ if test x"$pgac_avx2_support" = x"yes"; then
+
+$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/configure.ac b/configure.ac
index 13c75170f7a..c2180111044 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2089,6 +2089,15 @@ else
fi
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ PGAC_AVX2_SUPPORT()
+ if test x"$pgac_avx2_support" = x"yes"; then
+ AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
+ fi
+fi
+
# Check for XSAVE intrinsics
#
PGAC_XSAVE_INTRINSICS()
diff --git a/meson.build b/meson.build
index 6d304f32fb0..61620cbe37a 100644
--- a/meson.build
+++ b/meson.build
@@ -2348,6 +2348,36 @@ int main(void)
endif
+###############################################################
+# Check for the availability of AVX2 support
+###############################################################
+
+if host_cpu == 'x86_64'
+
+ prog = '''
+#include <immintrin.h>
+#include <stdint.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx2")))
+#endif
+static int avx2_test(void)
+{
+ return 0;
+}
+
+int main(void)
+{
+ return avx2_test();
+}
+'''
+
+ if cc.links(prog, name: 'AVX2 support', args: test_c_args)
+ cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
+ endif
+
+endif
+
+
###############################################################
# Check for the availability of AVX-512 popcount intrinsics.
###############################################################
diff --git a/src/backend/storage/page/checksum.c b/src/backend/storage/page/checksum.c
index 8716651c8b5..55ebe988411 100644
--- a/src/backend/storage/page/checksum.c
+++ b/src/backend/storage/page/checksum.c
@@ -13,10 +13,120 @@
*/
#include "postgres.h"
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+#ifdef HAVE_XSAVE_INTRINSICS
+#include <immintrin.h>
+#endif
+
#include "storage/checksum.h"
+
/*
* The actual code is in storage/checksum_impl.h. This is done so that
* external programs can incorporate the checksum code by #include'ing
- * that file from the exported Postgres headers. (Compare our CRC code.)
+ * that file from the exported Postgres headers. (Compare our legacy
+ * CRC code in pg_crc.h.)
+ * The PG_CHECKSUM_INTERNAL symbol allows core to use hardware-specific
+ * coding without affecting external programs.
*/
+#define PG_CHECKSUM_INTERNAL
#include "storage/checksum_impl.h" /* IWYU pragma: keep */
+
+
+/* WIP: the feature detection should go in src/port */
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#endif
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
+
+/*
+ * Does XGETBV say the YMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+ymm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+ return (_xgetbv(0) & 0x06) == 0x06;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Check for AVX2 support using CPUID detection
+ */
+static inline bool
+avx2_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#endif
+
+ return (exx[1] & (1 << 5)) != 0; /* avx2 */
+}
+
+static uint32
+pg_checksum_block_fallback(const PGChecksummablePage *page)
+{
+#include "storage/checksum_block_internal.h"
+}
+
+/*
+ * AVX2-optimized block checksum algorithm.
+ */
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+pg_attribute_target("avx2")
+static uint32
+pg_checksum_block_avx2(const PGChecksummablePage *page)
+{
+#include "storage/checksum_block_internal.h"
+}
+#endif /* USE_AVX2_WITH_RUNTIME_CHECK */
+
+/*
+ * Choose the best available checksum implementation.
+ */
+static uint32
+pg_checksum_choose(const PGChecksummablePage *page)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+ if (xsave_available() &&
+ ymm_regs_available() &&
+ avx2_available())
+ pg_checksum_block = pg_checksum_block_avx2;
+ else
+#endif
+ pg_checksum_block = pg_checksum_block_fallback;
+
+ return pg_checksum_block(page);
+}
+
+static uint32 (*pg_checksum_block) (const PGChecksummablePage *page) = pg_checksum_choose;
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 339268dc8ef..1e43e9b2bc4 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -665,6 +665,9 @@
/* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */
#undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
+/* Define to 1 to use AVX2 instructions with a runtime check. */
+#undef USE_AVX2_WITH_RUNTIME_CHECK
+
/* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
#undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
diff --git a/src/include/storage/checksum_block_internal.h b/src/include/storage/checksum_block_internal.h
new file mode 100644
index 00000000000..b4e6987d6b5
--- /dev/null
+++ b/src/include/storage/checksum_block_internal.h
@@ -0,0 +1,42 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksum_block_internal.h
+ * Core algorithm for page checksums , semi private to checksum_impl.h
+ * and checksum.c.
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/checksum_block_internal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* there is deliberately not an #ifndef CHECKSUM_BLOCK_INTERNAL_H here */
+
+uint32 sums[N_SUMS];
+uint32 result = 0;
+uint32 i,
+ j;
+
+/* ensure that the size is compatible with the algorithm */
+Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+
+/* initialize partial checksums to their corresponding offsets */
+memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+
+/* main checksum calculation */
+for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
+ for (j = 0; j < N_SUMS; j++)
+ CHECKSUM_COMP(sums[j], page->data[i][j]);
+
+/* finally add in two rounds of zeroes for additional mixing */
+for (i = 0; i < 2; i++)
+ for (j = 0; j < N_SUMS; j++)
+ CHECKSUM_COMP(sums[j], 0);
+
+/* xor fold partial checksums together */
+for (i = 0; i < N_SUMS; i++)
+ result ^= sums[i];
+
+return result;
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 5c2dcbc63e7..8a308e423c3 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -73,11 +73,10 @@
* 2e-16 false positive rate within margin of error.
*
* Vectorization of the algorithm requires 32bit x 32bit -> 32bit integer
- * multiplication instruction. As of 2013 the corresponding instruction is
- * available on x86 SSE4.1 extensions (pmulld) and ARM NEON (vmul.i32).
- * Vectorization requires a compiler to do the vectorization for us. For recent
- * GCC versions the flags -msse4.1 -funroll-loops -ftree-vectorize are enough
- * to achieve vectorization.
+ * multiplication instruction. Examples include x86 AVX2 extensions (vpmulld)
+ * and ARM NEON (vmul.i32). For simplicity we rely on the compiler to do the
+ * vectorization for us. For GCC and clang the flags -funroll-loops
+ * -ftree-vectorize are enough to achieve vectorization.
*
* The optimal amount of parallelism to use depends on CPU specific instruction
* latency, SIMD instruction width, throughput and the amount of registers
@@ -89,8 +88,9 @@
*
* The parallelism number 32 was chosen based on the fact that it is the
* largest state that fits into architecturally visible x86 SSE registers while
- * leaving some free registers for intermediate values. For future processors
- * with 256bit vector registers this will leave some performance on the table.
+ * leaving some free registers for intermediate values. For processors
+ * with 256bit vector registers this leaves some performance on the table.
+ *
* When vectorization is not available it might be beneficial to restructure
* the computation to calculate a subset of the columns at a time and perform
* multiple passes to avoid register spilling. This optimization opportunity
@@ -138,6 +138,9 @@ do { \
(checksum) = __tmp * FNV_PRIME ^ (__tmp >> 17); \
} while (0)
+/* Provide a static definition for external programs */
+#ifndef PG_CHECKSUM_INTERNAL
+
/*
* Block checksum algorithm. The page must be adequately aligned
* (at least on 4-byte boundary).
@@ -145,34 +148,13 @@ do { \
static uint32
pg_checksum_block(const PGChecksummablePage *page)
{
- uint32 sums[N_SUMS];
- uint32 result = 0;
- uint32 i,
- j;
-
- /* ensure that the size is compatible with the algorithm */
- Assert(sizeof(PGChecksummablePage) == BLCKSZ);
-
- /* initialize partial checksums to their corresponding offsets */
- memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
-
- /* main checksum calculation */
- for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], page->data[i][j]);
-
- /* finally add in two rounds of zeroes for additional mixing */
- for (i = 0; i < 2; i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], 0);
-
- /* xor fold partial checksums together */
- for (i = 0; i < N_SUMS; i++)
- result ^= sums[i];
-
- return result;
+#include "storage/checksum_block_internal.h"
}
+#else
+static uint32 (*pg_checksum_block) (const PGChecksummablePage *page);
+#endif
+
/*
* Compute the checksum for a Postgres page.
*
diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck
index 7a6755991bb..569e749b25a 100755
--- a/src/tools/pginclude/headerscheck
+++ b/src/tools/pginclude/headerscheck
@@ -154,6 +154,8 @@ do
test "$f" = src/include/catalog/syscache_ids.h && continue
test "$f" = src/include/catalog/syscache_info.h && continue
+ test "$f" = src/include/storage/checksum_block_internal.h && continue
+
# We can't make these Bison output files compilable standalone
# without using "%code require", which old Bison versions lack.
# parser/gram.h will be included by parser/gramparse.h anyway.
--
2.52.0
[application/x-patch] v11-0002-Adjust-benchmark-to-use-core-checksum.patch (1.6K, 3-v11-0002-Adjust-benchmark-to-use-core-checksum.patch)
download | inline diff:
From b6a3aba8f9568684cf22af42584bec65b6170668 Mon Sep 17 00:00:00 2001
From: John Naylor <[email protected]>
Date: Fri, 9 Jan 2026 17:07:37 +0700
Subject: [PATCH v11 2/3] Adjust benchmark to use core checksum
XXX not for commit
---
contrib/pg_checksum_bench/pg_checksum_bench.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
index dc20395a590..61da664e723 100644
--- a/contrib/pg_checksum_bench/pg_checksum_bench.c
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -1,7 +1,6 @@
#include "postgres.h"
#include "fmgr.h"
-#include "port/checksum.h"
-#include "port/checksum_impl.h"
+#include "storage/checksum.h"
#include <stdio.h>
#include <assert.h>
@@ -15,23 +14,23 @@ Datum
drive_pg_checksum(PG_FUNCTION_ARGS)
{
int page_count = PG_GETARG_INT32(0);
- PGChecksummablePage *pages;
+ char *pages;
int i;
size_t j;
- pages = palloc(page_count * sizeof(PGChecksummablePage));
+ pages = palloc(page_count * BLCKSZ);
srand(0);
- for (j = 0; j < page_count * sizeof(PGChecksummablePage); j++)
+ for (j = 0; j < page_count * BLCKSZ; j++)
{
- char *byte_ptr = (char *) pages;
+ char *byte_ptr = pages;
byte_ptr[j] = rand() % 256;
}
for (i = 0; i < REPEATS; i++)
{
- const PGChecksummablePage *test_page = pages + (i % page_count);
- volatile uint32 result = pg_checksum_block_choose((const char *) test_page);
+ char *test_page = pages + (i % page_count);
+ volatile uint32 result = pg_checksum_page((char *) test_page, 0);
(void) result;
}
--
2.52.0
[application/x-patch] v11-0001-Benchmark-code-for-postgres-checksums.patch (4.9K, 4-v11-0001-Benchmark-code-for-postgres-checksums.patch)
download | inline diff:
From 97a24b6da8fddaaafc2ed434dabf14a53bd6eecb Mon Sep 17 00:00:00 2001
From: Andrew Kim <[email protected]>
Date: Wed, 5 Nov 2025 14:37:29 -0800
Subject: [PATCH v11 1/3] Benchmark code for postgres checksums
Add pg_checksum_bench extension for performance testing of checksum
implementations with AVX2 optimization.
XXX not for commit
---
contrib/meson.build | 1 +
contrib/pg_checksum_bench/meson.build | 23 ++++++++++
.../pg_checksum_bench--1.0.sql | 8 ++++
contrib/pg_checksum_bench/pg_checksum_bench.c | 42 +++++++++++++++++++
.../pg_checksum_bench.control | 4 ++
.../sql/pg_checksum_bench.sql | 17 ++++++++
6 files changed, 95 insertions(+)
create mode 100644 contrib/pg_checksum_bench/meson.build
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.c
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.control
create mode 100644 contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
diff --git a/contrib/meson.build b/contrib/meson.build
index def13257cbe..98fe47b5b9b 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('pg_checksum_bench')
subdir('amcheck')
subdir('auth_delay')
subdir('auto_explain')
diff --git a/contrib/pg_checksum_bench/meson.build b/contrib/pg_checksum_bench/meson.build
new file mode 100644
index 00000000000..32ccd9efa0f
--- /dev/null
+++ b/contrib/pg_checksum_bench/meson.build
@@ -0,0 +1,23 @@
+# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+
+pg_checksum_bench_sources = files(
+ 'pg_checksum_bench.c',
+)
+
+if host_system == 'windows'
+ pg_checksum_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_checksum_bench',
+ '--FILEDESC', 'pg_checksum_bench',])
+endif
+
+pg_checksum_bench = shared_module('pg_checksum_bench',
+ pg_checksum_bench_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_checksum_bench
+
+install_data(
+ 'pg_checksum_bench--1.0.sql',
+ 'pg_checksum_bench.control',
+ kwargs: contrib_data_args,
+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
new file mode 100644
index 00000000000..5f13cbe3c5e
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+-- \echo Use "CREATE EXTENSION pg_checksum_bench" to load this file. \quit
+
+CREATE FUNCTION drive_pg_checksum(page_count int)
+ RETURNS pg_catalog.void
+ AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
new file mode 100644
index 00000000000..dc20395a590
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -0,0 +1,42 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "port/checksum.h"
+#include "port/checksum_impl.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+PG_MODULE_MAGIC;
+
+#define REPEATS 1000000
+
+PG_FUNCTION_INFO_V1(drive_pg_checksum);
+Datum
+drive_pg_checksum(PG_FUNCTION_ARGS)
+{
+ int page_count = PG_GETARG_INT32(0);
+ PGChecksummablePage *pages;
+ int i;
+ size_t j;
+
+ pages = palloc(page_count * sizeof(PGChecksummablePage));
+ srand(0);
+ for (j = 0; j < page_count * sizeof(PGChecksummablePage); j++)
+ {
+ char *byte_ptr = (char *) pages;
+
+ byte_ptr[j] = rand() % 256;
+ }
+
+ for (i = 0; i < REPEATS; i++)
+ {
+ const PGChecksummablePage *test_page = pages + (i % page_count);
+ volatile uint32 result = pg_checksum_block_choose((const char *) test_page);
+
+ (void) result;
+ }
+
+ pfree((void *) pages);
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.control b/contrib/pg_checksum_bench/pg_checksum_bench.control
new file mode 100644
index 00000000000..4a4e2c9363c
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.control
@@ -0,0 +1,4 @@
+comment = 'pg_checksum benchmark'
+default_version = '1.0'
+module_pathname = '$libdir/pg_checksum_bench'
+relocatable = true
diff --git a/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
new file mode 100644
index 00000000000..4b347699953
--- /dev/null
+++ b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
@@ -0,0 +1,17 @@
+CREATE EXTENSION pg_checksum_bench;
+
+SELECT drive_pg_checksum(-1);
+
+\timing on
+
+SELECT drive_pg_checksum(1);
+SELECT drive_pg_checksum(2);
+SELECT drive_pg_checksum(4);
+SELECT drive_pg_checksum(8);
+SELECT drive_pg_checksum(16);
+SELECT drive_pg_checksum(32);
+SELECT drive_pg_checksum(64);
+SELECT drive_pg_checksum(128);
+SELECT drive_pg_checksum(256);
+SELECT drive_pg_checksum(512);
+SELECT drive_pg_checksum(1024);
--
2.52.0
view thread (15+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected]
Subject: Re: Proposal for enabling auto-vectorization for checksum calculations
In-Reply-To: <CANWCAZZJ1tQcwWZe4BTgv1E-+bvhe4d0LzJvXeZCFMjRtWpk-w@mail.gmail.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox