public inbox for [email protected]  
help / color / mirror / Atom feed
From: Matthew Sterrett <[email protected]>
To: Nazir Bilal Yavuz <[email protected]>
Cc: Stepan Neretin <[email protected]>
Cc: [email protected]
Cc: Devulapalli, Raghuveer <[email protected]>
Cc: Shankaran, Akash <[email protected]>
Subject: Re: Proposal for enabling auto-vectorization for checksum calculations
Date: Thu, 22 May 2025 14:54:28 -0700
Message-ID: <CA+vA85_Er75cm7-0LWdiBbQvzk57xkOXH3jQ7bZTQiHGzjMC9Q@mail.gmail.com> (raw)
In-Reply-To: <CAN55FZ3pWRo_uettqKdAAtYyYSgrc9rnVY3H0Mfs-PHYuhPaKw@mail.gmail.com>
References: <CA+vA85_5GTu+HHniSbvvP+8k3=xZO=WE84NPwiKyxztqvpfZ3Q@mail.gmail.com>
	<CA+vA8587=RpZuL+EJ=XKM-8-xdPxz2mFexbAjaimN+OkXysXqw@mail.gmail.com>
	<CA+Yyo5RihTRUdUdanuNYhjQeXQY6412FWjzaxJAQR5MGX83=EQ@mail.gmail.com>
	<CA+Yyo5Qw=v2bcPSeyGgX3WuHeDXm7vxPcET1n+yUreUmO0rk_A@mail.gmail.com>
	<CA+vA858b4J1k_34vRR+=UsM5U4JSVc4QkigHExk0RYH9nk3iRQ@mail.gmail.com>
	<CAN55FZ3pWRo_uettqKdAAtYyYSgrc9rnVY3H0Mfs-PHYuhPaKw@mail.gmail.com>

> You can see the failure at the artifacts ->
> 'log/tmp_install/log/install.log' file on the CI web page [1].
>
> If you want to replicate that on your local:
>
> $ ./configure --with-llvm CLANG="ccache clang-16"
> $ make -s -j8 world-bin
> $ make -j8 check-world
>
> should be enough. I was able to replicate it with these commands. I
> hope these help.
Thanks so much for helping me figure this out!

Okay, I've determined that versions of LLVM/Clang before 19 crash when
compiling this patch for some reason; it seems that both make
check-world and make install will crash with the affected LLVM
versions.
Unfortunately, what matters seems to be the version of the linker/LTO
optimizer, which I don't think we can check at compile time.
I added a check for Clang>=19 which works at preventing the crash on my system.
I think it's possible some unusual combination of clang/LLVM might
still crash during the build, but I think this is a reasonable
solution


Attachments:

  [application/octet-stream] v4-0005-Use-dummy-function-to-avoid-linker-error-move-dec.patch (1.9K, 2-v4-0005-Use-dummy-function-to-avoid-linker-error-move-dec.patch)
  download | inline diff:
From f7b28c6378db9cc98371e89e830a2ae8a571e9a0 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <[email protected]>
Date: Mon, 19 May 2025 13:23:55 -0700
Subject: [PATCH v4 5/6] Use dummy function to avoid linker error, move
 declarations

---
 src/include/storage/checksum_impl.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 042ee8af120..4070646e23e 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -163,7 +163,7 @@ ymm_regs_available(void)
 static inline bool
 avx2_available(void)
 {
-#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+#if defined (USE_AVX2_WITH_RUNTIME_CHECK) && defined(__x86_64__)
 	unsigned int exx[4] = {0, 0, 0, 0};
 	if (!xsave_available() || !ymm_regs_available()) return false;
 
@@ -220,7 +220,20 @@ do { \
 static uint32 \
 pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
 
+#define PG_DEFINE_CHECKSUM_DUMMY(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+	Assert(false); /* This function should never be called */ \
+	return pg_checksum_block_default(page); /* Just in case it somehow is */ \
+}
+
 #define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
 pg_attribute_target(#ISANAME) \
 static uint32 \
 pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
@@ -254,10 +267,11 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
 	return result; \
 }
 
-/* Declarations are always defined to make dynamic dispatch code simpler */
 PG_DEFINE_CHECKSUM_ISA(default);
 #ifdef USE_AVX2_WITH_RUNTIME_CHECK
 PG_DEFINE_CHECKSUM_ISA(avx2);
+#else
+PG_DEFINE_CHECKSUM_DUMMY(avx2);
 #endif
 
 static uint32
-- 
2.43.0



  [application/octet-stream] v4-0004-fix-bench-compiling.patch (692B, 3-v4-0004-fix-bench-compiling.patch)
  download | inline diff:
From 78873aec488678accbed1c4d1fc7ce15d12df303 Mon Sep 17 00:00:00 2001
From: Stepan Neretin <[email protected]>
Date: Sat, 10 May 2025 17:57:47 +0700
Subject: [PATCH v4 4/6] fix bench compiling

---
 contrib/pg_checksum_bench/pg_checksum_bench.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
index f40f335ff59..0296dcfd259 100644
--- a/contrib/pg_checksum_bench/pg_checksum_bench.c
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -1,5 +1,6 @@
 #include "postgres.h"
 #include "fmgr.h"
+#include "storage/checksum.h"
 #include "storage/checksum_impl.h"
 
 #include <stdio.h>
-- 
2.43.0



  [application/octet-stream] v4-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patch (1.1K, 4-v4-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patch)
  download | inline diff:
From 30d916546860d3d19f8d91a0ab4bc99f9c350aaa Mon Sep 17 00:00:00 2001
From: Stepan Neretin <[email protected]>
Date: Sat, 10 May 2025 16:37:13 +0700
Subject: [PATCH v4 2/6] Fix compilation on systems where <immintrin.h> is not
 available or inappropriate, such as older GCC versions or non-x86 platforms.

---
 src/include/storage/checksum_impl.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 5ea1f698b57..042ee8af120 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -112,7 +112,9 @@
 #include <cpuid.h>
 #endif
 
+#ifdef HAVE_XSAVE_INTRINSICS
 #include <immintrin.h>
+#endif
 
 #if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
 #include <intrin.h>
@@ -253,10 +255,6 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
 }
 
 /* Declarations are always defined to make dynamic dispatch code simpler */
-
-PG_DECLARE_CHECKSUM_ISA(default);
-PG_DECLARE_CHECKSUM_ISA(avx2);
-
 PG_DEFINE_CHECKSUM_ISA(default);
 #ifdef USE_AVX2_WITH_RUNTIME_CHECK
 PG_DEFINE_CHECKSUM_ISA(avx2);
-- 
2.43.0



  [application/octet-stream] v4-0001-Enable-autovectorizing-pg_checksum_block.patch (10.5K, 5-v4-0001-Enable-autovectorizing-pg_checksum_block.patch)
  download | inline diff:
From 58122ebe1bbebc70da90c7ec3e77f3c3d524aa85 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <[email protected]>
Date: Fri, 7 Mar 2025 11:33:45 -0800
Subject: [PATCH v4 1/6] Enable autovectorizing pg_checksum_block

---
 config/c-compiler.m4                |  31 ++++++
 configure                           |  52 ++++++++++
 configure.ac                        |   9 ++
 meson.build                         |  28 ++++++
 src/include/pg_config.h.in          |   3 +
 src/include/storage/checksum_impl.h | 150 +++++++++++++++++++++++-----
 6 files changed, 250 insertions(+), 23 deletions(-)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 5f3e1d1faf9..4d5bceafe9e 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -710,6 +710,37 @@ fi
 undefine([Ac_cachevar])dnl
 ])# PGAC_XSAVE_INTRINSICS
 
+# PGAC_AVX2_SUPPORT
+# -----------------------------
+# Check if the compiler supports AVX2 in attribute((target))
+# and using AVX2 intrinsics in those functions
+#
+# If the intrinsics are supported, sets pgac_avx2_support.
+AC_DEFUN([PGAC_AVX2_SUPPORT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
+AC_CACHE_CHECK([for AVX2 support], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+    #include <stdint.h>
+    #if defined(__has_attribute) && __has_attribute (target)
+    __attribute__((target("avx2")))
+    #endif
+    static int avx2_test(void)
+    {
+      const char buf@<:@sizeof(__m256i)@:>@;
+      __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+	  accum = _mm256_add_epi32(accum, accum);
+      int result = _mm256_extract_epi32(accum, 0);
+      return (int) result;
+    }],
+  [return avx2_test();])],
+  [Ac_cachevar=yes],
+  [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+  pgac_avx2_support=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX2_SUPPORT
+
 # PGAC_AVX512_POPCNT_INTRINSICS
 # -----------------------------
 # Check if the compiler supports the AVX-512 popcount instructions using the
diff --git a/configure b/configure
index 4f15347cc95..0b328dc22db 100755
--- a/configure
+++ b/configure
@@ -17724,6 +17724,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
 
 fi
 
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${pgac_cv_avx2_support+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <immintrin.h>
+    #include <stdint.h>
+    #if defined(__has_attribute) && __has_attribute (target)
+    __attribute__((target("avx2")))
+    #endif
+    static int avx2_test(void)
+    {
+      const char buf[sizeof(__m256i)];
+      __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+	  accum = _mm256_add_epi32(accum, accum);
+      int result = _mm256_extract_epi32(accum, 0);
+      return (int) result;
+    }
+int
+main ()
+{
+return avx2_test();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_avx2_support=yes
+else
+  pgac_cv_avx2_support=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
+$as_echo "$pgac_cv_avx2_support" >&6; }
+if test x"$pgac_cv_avx2_support" = x"yes"; then
+  pgac_avx2_support=yes
+fi
+
+  if test x"$pgac_avx2_support" = x"yes"; then
+
+$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+  fi
+fi
+
 # Check for AVX-512 popcount intrinsics
 #
 if test x"$host_cpu" = x"x86_64"; then
diff --git a/configure.ac b/configure.ac
index 4b8335dc613..b980429282f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2089,6 +2089,15 @@ if test x"$pgac_xsave_intrinsics" = x"yes"; then
   AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
 fi
 
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+  PGAC_AVX2_SUPPORT()
+  if test x"$pgac_avx2_support" = x"yes"; then
+    AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
+  fi
+fi
+
 # Check for AVX-512 popcount intrinsics
 #
 if test x"$host_cpu" = x"x86_64"; then
diff --git a/meson.build b/meson.build
index d142e3e408b..e0ab1f9cf2f 100644
--- a/meson.build
+++ b/meson.build
@@ -2301,6 +2301,34 @@ int main(void)
 
 endif
 
+###############################################################
+# Check for the availability of AVX2 support
+###############################################################
+
+if host_cpu == 'x86_64'
+
+  prog = '''
+#include <immintrin.h>
+#include <stdint.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx2")))
+#endif
+int main(void)
+{
+  const char buf[sizeof(__m256i)];
+  __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+  accum = _mm256_add_epi32(accum, accum);
+  int result = _mm256_extract_epi32(accum, 0);
+  return (int) result;
+}
+'''
+
+  if cc.links(prog, name: 'AVX2 support', args: test_c_args)
+    cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
+  endif
+
+endif
+
 
 ###############################################################
 # Check for the availability of AVX-512 popcount intrinsics.
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..34fa398ee8c 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -672,6 +672,9 @@
 /* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */
 #undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
 
+/* Define to 1 to use AVX2 instructions with a runtime check. */
+#undef USE_AVX2_WITH_RUNTIME_CHECK
+
 /* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
 #undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
 
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index da87d61ba52..5ea1f698b57 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -101,12 +101,83 @@
  */
 
 #include "storage/bufpage.h"
+#include "pg_config.h"
 
 /* number of checksums to calculate in parallel */
 #define N_SUMS 32
 /* prime multiplier of FNV-1a hash */
 #define FNV_PRIME 16777619
 
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#include <immintrin.h>
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
+}
+
+/*
+ * Does XGETBV say the YMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+ymm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+	return (_xgetbv(0) & 0x06) == 0x06;
+#else
+	return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-2
+ */
+static inline bool
+avx2_available(void)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+	unsigned int exx[4] = {0, 0, 0, 0};
+	if (!xsave_available() || !ymm_regs_available()) return false;
+
+#if defined(HAVE__GET_CPUID_COUNT)
+	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+	__cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+	return (exx[1] & (1 << 5)) != 0; /* avx2 */
+#else
+	return false;
+#endif
+}
+
 /* Use a union so that this code is valid under strict aliasing */
 typedef union
 {
@@ -142,35 +213,68 @@ do { \
  * Block checksum algorithm.  The page must be adequately aligned
  * (at least on 4-byte boundary).
  */
-static uint32
-pg_checksum_block(const PGChecksummablePage *page)
-{
-	uint32		sums[N_SUMS];
-	uint32		result = 0;
-	uint32		i,
-				j;
+ 
+#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
+
+#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+	uint32		sums[N_SUMS]; \
+	uint32		result = 0; \
+	uint32		i, \
+				j; \
+ \
+	/* ensure that the size is compatible with the algorithm */ \
+	Assert(sizeof(PGChecksummablePage) == BLCKSZ); \
+ \
+	/* initialize partial checksums to their corresponding offsets */ \
+	memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); \
+ \
+	/* main checksum calculation */ \
+	/* this is the main place that autovectorization occurs */ \
+	for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) \
+		for (j = 0; j < N_SUMS; j++) \
+			CHECKSUM_COMP(sums[j], page->data[i][j]); \
+ \
+	/* finally add in two rounds of zeroes for additional mixing */ \
+	for (i = 0; i < 2; i++) \
+		for (j = 0; j < N_SUMS; j++) \
+			CHECKSUM_COMP(sums[j], 0); \
+ \
+	/* xor fold partial checksums together */ \
+	for (i = 0; i < N_SUMS; i++) \
+		result ^= sums[i]; \
+ \
+	return result; \
+}
 
-	/* ensure that the size is compatible with the algorithm */
-	Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+/* Declarations are always defined to make dynamic dispatch code simpler */
 
-	/* initialize partial checksums to their corresponding offsets */
-	memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+PG_DECLARE_CHECKSUM_ISA(default);
+PG_DECLARE_CHECKSUM_ISA(avx2);
 
-	/* main checksum calculation */
-	for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
-		for (j = 0; j < N_SUMS; j++)
-			CHECKSUM_COMP(sums[j], page->data[i][j]);
+PG_DEFINE_CHECKSUM_ISA(default);
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+PG_DEFINE_CHECKSUM_ISA(avx2);
+#endif
 
-	/* finally add in two rounds of zeroes for additional mixing */
-	for (i = 0; i < 2; i++)
-		for (j = 0; j < N_SUMS; j++)
-			CHECKSUM_COMP(sums[j], 0);
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page);
 
-	/* xor fold partial checksums together */
-	for (i = 0; i < N_SUMS; i++)
-		result ^= sums[i];
+static uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_dispatch;
 
-	return result;
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page){
+	if (avx2_available()){
+		pg_checksum_block = pg_checksum_block_avx2;
+	}else{
+		pg_checksum_block = pg_checksum_block_default;
+	}
+	return pg_checksum_block(page);
 }
 
 /*
-- 
2.43.0



  [application/octet-stream] v4-0003-Benchmark-code-for-postgres-checksums.patch (4.7K, 6-v4-0003-Benchmark-code-for-postgres-checksums.patch)
  download | inline diff:
From cd6dc517b897faf3d960fafe8ea6b2cf9bbc2e05 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <[email protected]>
Date: Fri, 7 Mar 2025 11:33:27 -0800
Subject: [PATCH v4 3/6] Benchmark code for postgres checksums

---
 contrib/meson.build                           |  1 +
 contrib/pg_checksum_bench/meson.build         | 23 +++++++++++++
 .../pg_checksum_bench--1.0.sql                |  8 +++++
 contrib/pg_checksum_bench/pg_checksum_bench.c | 34 +++++++++++++++++++
 .../pg_checksum_bench.control                 |  4 +++
 .../sql/pg_checksum_bench.sql                 | 17 ++++++++++
 6 files changed, 87 insertions(+)
 create mode 100644 contrib/pg_checksum_bench/meson.build
 create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
 create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.c
 create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.control
 create mode 100644 contrib/pg_checksum_bench/sql/pg_checksum_bench.sql

diff --git a/contrib/meson.build b/contrib/meson.build
index ed30ee7d639..fe5149aadff 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
   'install_dir': contrib_doc_dir,
 }
 
+subdir('pg_checksum_bench')
 subdir('amcheck')
 subdir('auth_delay')
 subdir('auto_explain')
diff --git a/contrib/pg_checksum_bench/meson.build b/contrib/pg_checksum_bench/meson.build
new file mode 100644
index 00000000000..32ccd9efa0f
--- /dev/null
+++ b/contrib/pg_checksum_bench/meson.build
@@ -0,0 +1,23 @@
+# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+
+pg_checksum_bench_sources = files(
+  'pg_checksum_bench.c',
+)
+
+if host_system == 'windows'
+  pg_checksum_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'pg_checksum_bench',
+    '--FILEDESC', 'pg_checksum_bench',])
+endif
+
+pg_checksum_bench = shared_module('pg_checksum_bench',
+  pg_checksum_bench_sources,
+  kwargs: contrib_mod_args,
+)
+contrib_targets += pg_checksum_bench
+
+install_data(
+  'pg_checksum_bench--1.0.sql',
+  'pg_checksum_bench.control',
+  kwargs: contrib_data_args,
+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
new file mode 100644
index 00000000000..5f13cbe3c5e
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+-- \echo Use "CREATE EXTENSION pg_checksum_bench" to load this file. \quit
+
+CREATE FUNCTION drive_pg_checksum(page_count int)
+	RETURNS pg_catalog.void
+	AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
new file mode 100644
index 00000000000..f40f335ff59
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -0,0 +1,34 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "storage/checksum_impl.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+PG_MODULE_MAGIC;
+
+#define REPEATS 1000000
+
+PG_FUNCTION_INFO_V1(drive_pg_checksum);
+Datum
+drive_pg_checksum(PG_FUNCTION_ARGS)
+{
+	int page_count = PG_GETARG_INT32(0);
+
+	PGChecksummablePage * pages = palloc(page_count * sizeof(PGChecksummablePage));
+	srand(0);
+	for (size_t i = 0; i < page_count * sizeof(PGChecksummablePage); i++){
+		char * byte_ptr = (char *) pages;
+		byte_ptr[i] = rand() % 256;
+	}
+
+	for (int i = 0; i < REPEATS; i++){
+		const PGChecksummablePage * test_page = pages + (i % page_count);
+		volatile uint32 result = pg_checksum_block(test_page);
+		(void) result;
+	}
+
+	pfree((void *) pages);
+
+	PG_RETURN_VOID();
+}
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.control b/contrib/pg_checksum_bench/pg_checksum_bench.control
new file mode 100644
index 00000000000..4a4e2c9363c
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.control
@@ -0,0 +1,4 @@
+comment = 'pg_checksum benchmark'
+default_version = '1.0'
+module_pathname = '$libdir/pg_checksum_bench'
+relocatable = true
diff --git a/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
new file mode 100644
index 00000000000..4b347699953
--- /dev/null
+++ b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
@@ -0,0 +1,17 @@
+CREATE EXTENSION pg_checksum_bench;
+
+SELECT drive_pg_checksum(-1);
+
+\timing on
+
+SELECT drive_pg_checksum(1);
+SELECT drive_pg_checksum(2);
+SELECT drive_pg_checksum(4);
+SELECT drive_pg_checksum(8);
+SELECT drive_pg_checksum(16);
+SELECT drive_pg_checksum(32);
+SELECT drive_pg_checksum(64);
+SELECT drive_pg_checksum(128);
+SELECT drive_pg_checksum(256);
+SELECT drive_pg_checksum(512);
+SELECT drive_pg_checksum(1024);
-- 
2.43.0



  [application/octet-stream] v4-0006-Workaround-for-clang-19-crash.patch (2.0K, 7-v4-0006-Workaround-for-clang-19-crash.patch)
  download | inline diff:
From 5fbb374a397d22febcee97b08564967afaacf37e Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <[email protected]>
Date: Wed, 21 May 2025 16:00:22 -0700
Subject: [PATCH v4 6/6] Workaround for clang<19 crash

---
 src/include/storage/checksum_impl.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 4070646e23e..81dfecff17d 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -132,7 +132,7 @@ xsave_available(void)
 	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUID)
 	__cpuid(exx, 1);
-#else
+#elif defined(__x86_64__)
 #error cpuid instruction not available
 #endif
 	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
@@ -215,15 +215,10 @@ do { \
  * Block checksum algorithm.  The page must be adequately aligned
  * (at least on 4-byte boundary).
  */
- 
-#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
-static uint32 \
-pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
 
 #define PG_DEFINE_CHECKSUM_DUMMY(ISANAME) \
 static uint32 \
 pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
-pg_attribute_target(#ISANAME) \
 static uint32 \
 pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
 { \
@@ -267,6 +262,9 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
 	return result; \
 }
 
+/* Older Clang versions crash during LTO with this code */
+#if !(__clang_major__) || __clang_major__ >= 19
+
 PG_DEFINE_CHECKSUM_ISA(default);
 #ifdef USE_AVX2_WITH_RUNTIME_CHECK
 PG_DEFINE_CHECKSUM_ISA(avx2);
@@ -289,6 +287,15 @@ pg_checksum_block_dispatch(const PGChecksummablePage *page){
 	return pg_checksum_block(page);
 }
 
+#else
+/* The same as before the patch, for the crashing Clang versions */
+PG_DEFINE_CHECKSUM_ISA(default);
+static uint32 pg_checksum_block(const PGChecksummablePage *page) {
+	return pg_checksum_block_default(page);
+}
+
+#endif
+
 /*
  * Compute the checksum for a Postgres page.
  *
-- 
2.43.0



view thread (36+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]
  Subject: Re: Proposal for enabling auto-vectorization for checksum calculations
  In-Reply-To: <CA+vA85_Er75cm7-0LWdiBbQvzk57xkOXH3jQ7bZTQiHGzjMC9Q@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox