public inbox for [email protected]  
help / color / mirror / Atom feed
From: Aleksander Alekseev <[email protected]>
To: PostgreSQL Development <[email protected]>
Subject: [PATCH] Refactor *_abbrev_convert() functions
Date: Tue, 13 Jan 2026 15:34:09 +0300
Message-ID: <CAJ7c6TMPhDRQMmkUHPv8oOK97B1mR8NRS61DgjpdaZUPAwaeZQ@mail.gmail.com> (raw)

Hi,

Now when all Datums are 64-bit values we can simplify the code by
using murmurhash64(). This refactoring was previously suggested by
John Naylor [1].

[1]: https://postgr.es/m/CANWCAZbMyrijdR0xc-4SqpNJBHMEwRZccBK4fa0aquNpq2Uj7w%40mail.gmail.com

-- 
Best regards,
Aleksander Alekseev


Attachments:

  [text/x-patch] v1-0001-Refactor-_abbrev_convert-functions.patch (4.5K, 2-v1-0001-Refactor-_abbrev_convert-functions.patch)
  download | inline diff:
From 69f0407ce161af62166b13eb504477d95ed31176 Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <[email protected]>
Date: Tue, 13 Jan 2026 14:51:21 +0300
Subject: [PATCH v1] Refactor *_abbrev_convert() functions

Now when all Datums are 64-bit values we can simplify the code by using
murmurhash64().

Author: Aleksander Alekseev <[email protected]>
Suggested-by: John Naylor <[email protected]>
Reviewed-by: TODO FIXME
Discussion: TODO FIXME
---
 src/backend/utils/adt/bytea.c   |  7 +------
 src/backend/utils/adt/mac.c     | 11 +++--------
 src/backend/utils/adt/network.c |  6 +-----
 src/backend/utils/adt/numeric.c |  5 +----
 src/backend/utils/adt/uuid.c    |  6 +-----
 src/backend/utils/adt/varlena.c |  7 +------
 6 files changed, 8 insertions(+), 34 deletions(-)

diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c
index fd7662d41ee..d880b62688c 100644
--- a/src/backend/utils/adt/bytea.c
+++ b/src/backend/utils/adt/bytea.c
@@ -1115,12 +1115,7 @@ bytea_abbrev_convert(Datum original, SortSupport ssup)
 	addHyperLogLog(&bss->full_card, hash);
 
 	/* Hash abbreviated key */
-	{
-		uint32		tmp;
-
-		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
-		hash = DatumGetUInt32(hash_uint32(tmp));
-	}
+	hash = (uint32) murmurhash64(DatumGetUInt64(res));
 
 	addHyperLogLog(&bss->abbr_card, hash);
 
diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c
index f14675dea40..0658846f274 100644
--- a/src/backend/utils/adt/mac.c
+++ b/src/backend/utils/adt/mac.c
@@ -492,17 +492,12 @@ macaddr_abbrev_convert(Datum original, SortSupport ssup)
 	uss->input_count += 1;
 
 	/*
-	 * Cardinality estimation. The estimate uses uint32, so XOR the two 32-bit
-	 * halves together to produce slightly more entropy. The two zeroed bytes
-	 * won't have any practical impact on this operation.
+	 * Cardinality estimation. The estimate uses uint32, so we hash the full
+	 * 64-bit value and take the lower 32 bits of the result.
 	 */
 	if (uss->estimating)
 	{
-		uint32		tmp;
-
-		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
-
-		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+		addHyperLogLog(&uss->abbr_card, (uint32) murmurhash64(DatumGetUInt64(res)));
 	}
 
 	/*
diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c
index 3a2002097dd..c226af5ca80 100644
--- a/src/backend/utils/adt/network.c
+++ b/src/backend/utils/adt/network.c
@@ -739,11 +739,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
 	/* Hash abbreviated key */
 	if (uss->estimating)
 	{
-		uint32		tmp;
-
-		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
-
-		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+		addHyperLogLog(&uss->abbr_card, (uint32) murmurhash64(DatumGetUInt64(res)));
 	}
 
 	return res;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 891ae6ba7fe..6d5e2a3644f 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -2397,10 +2397,7 @@ numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss)
 
 	if (nss->estimating)
 	{
-		uint32		tmp = ((uint32) result
-						   ^ (uint32) ((uint64) result >> 32));
-
-		addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+		addHyperLogLog(&nss->abbr_card, (uint32) murmurhash64(result));
 	}
 
 	return NumericAbbrevGetDatum(result);
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 6ee3752ac78..888802c3012 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -396,11 +396,7 @@ uuid_abbrev_convert(Datum original, SortSupport ssup)
 
 	if (uss->estimating)
 	{
-		uint32		tmp;
-
-		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
-
-		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+		addHyperLogLog(&uss->abbr_card, (uint32) murmurhash64(DatumGetUInt64(res)));
 	}
 
 	/*
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index c80191f0a22..280f2e65898 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -2115,12 +2115,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 	addHyperLogLog(&sss->full_card, hash);
 
 	/* Hash abbreviated key */
-	{
-		uint32		tmp;
-
-		tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
-		hash = DatumGetUInt32(hash_uint32(tmp));
-	}
+	hash = (uint32) murmurhash64(DatumGetUInt64(res));
 
 	addHyperLogLog(&sss->abbr_card, hash);
 
-- 
2.43.0



view thread (3+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected]
  Subject: Re: [PATCH] Refactor *_abbrev_convert() functions
  In-Reply-To: <CAJ7c6TMPhDRQMmkUHPv8oOK97B1mR8NRS61DgjpdaZUPAwaeZQ@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox