public inbox for [email protected]  
help / color / mirror / Atom feed
From: Matthias van de Meent <[email protected]>
To: PostgreSQL Hackers <[email protected]>
Subject: SQL-level pg_datum_image_equal
Date: Wed, 10 Dec 2025 18:46:03 +0100
Message-ID: <CAEze2WhsqYjg0oGY+7yooimUK7zRc9PY9u8u-Oo=VmJ+DAAkKg@mail.gmail.com> (raw)

Hi,

One of our customers has this workload where every so often they
update the whole table to make sure it's up-to-date. In general, you'd
probably want to use MERGE for such a workload and ignore all rows
that already have only matching data, but there's a catch: PostgreSQL
doesn't have an efficient way to check if the provided data is
actually equal in all senses of the word, so we can't easily and
cheaply determine whether an update is needed; which is one reason why
the full table was updated every time.

A naive approach to determining whether each value needs to be updated
would use `old IS NOT DISTINCT FROM new`, but a.) this relies on `=`
operators to exist for that type, and b.) the = operator of some types
don't always distinguish between values that are different for human
readers; with as famous example '1.0' and '1.00' in numeric; they have
an equal value but are clearly distinct to readers (and certain
functions).

One could get around this in this case by 'simply' casting to text and
comparing the outputs (using the C collation for performance and
determinism), or by wrapping it in a row (which then uses
record_image_eq, which does use binary compare functions internally),
but both imply additional parsing, wrapping, and overhead compared to
a direct datum_image_eq call.

So, attached is a simple and to-the-point patch that adds the function
mentioned in $subject, which will tell the user whether two values of
the same type have an exactly equal binary representation, using
datum_image_eq.


Kind regards,

Matthias van de Meent


Attachments:

  [application/octet-stream] v1-0001-Add-SQL-level-datum-equality-tests.patch (3.4K, 2-v1-0001-Add-SQL-level-datum-equality-tests.patch)
  download | inline diff:
From f5264368a05bb49b4ce0e4ff586fb5da6b1f5cc4 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <[email protected]>
Date: Wed, 10 Dec 2025 18:45:19 +0100
Subject: [PATCH v1] Add SQL-level datum equality tests

This enables improved performance for users that need to test for
exact bytewise differences in SQL; e.g. to see if an UPDATE is
really necessary for externally provided values.

A workaround around this limitation was possible through various
methods, but a generic method was not available for all types.
---
 src/backend/utils/adt/pseudotypes.c      | 51 ++++++++++++++++++++++++
 src/include/catalog/pg_proc.dat          |  7 ++++
 src/test/regress/expected/opr_sanity.out |  1 +
 3 files changed, 59 insertions(+)

diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c
index 317a1f2b282..216d895d2f7 100644
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -23,7 +23,9 @@
 #include "postgres.h"
 
 #include "libpq/pqformat.h"
+#include "utils/datum.h"
 #include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
 
 
 /*
@@ -375,3 +377,52 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement);
 PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray);
 PSEUDOTYPE_DUMMY_IO_FUNCS(anycompatible);
 PSEUDOTYPE_DUMMY_IO_FUNCS(anycompatiblenonarray);
+
+/*
+ * Compares two datums of the same (any) type, and returns whether they have
+ * the same binary representation.
+ */
+Datum
+pg_datum_image_equal(PG_FUNCTION_ARGS)
+{
+	bool		eq;
+
+	if (PG_ARGISNULL(0) != PG_ARGISNULL(1))
+	{
+		eq = false;
+	}
+	else if (PG_ARGISNULL(0))
+	{
+		/* both NULL */
+		eq = true;
+	}
+	else
+	{
+		Oid		typ;
+		Datum	arg0;
+		Datum	arg1;
+		bool	typbyval;
+		char	typalign;
+		int16	typlen;
+
+		typ = get_fn_expr_argtype(fcinfo->flinfo, 0);
+
+		if (!OidIsValid(typ))
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("could not determine type")));
+		}
+
+		Assert(typ == get_fn_expr_argtype(fcinfo->flinfo, 1));
+
+		arg0 = PG_GETARG_DATUM(0);
+		arg1 = PG_GETARG_DATUM(1);
+
+		get_typlenbyvalalign(typ, &typlen, &typbyval, &typalign);
+
+		eq = datum_image_eq(arg0, arg1, typbyval, typlen);
+	}
+
+	PG_RETURN_BOOL(eq);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fd9448ec7b9..2f1b1ba8370 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -12612,4 +12612,11 @@
   proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}',
   prosrc => 'pg_get_aios' },
 
+{ oid => '9200',
+  descr => 'test if two values have the same binary representation',
+  proname => 'pg_datum_image_equal', proisstrict => 'f',
+  proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'anyelement anyelement',
+  prosrc => 'pg_datum_image_equal' },
+
 ]
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index a357e1d0c0e..57fe8d6ede8 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -880,6 +880,7 @@ bytea(integer)
 bytea(bigint)
 bytea_larger(bytea,bytea)
 bytea_smaller(bytea,bytea)
+pg_datum_image_equal(anyelement,anyelement)
 -- Check that functions without argument are not marked as leakproof.
 SELECT p1.oid::regprocedure
 FROM pg_proc p1 JOIN pg_namespace pn
-- 
2.50.1 (Apple Git-155)



view thread (11+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected]
  Subject: Re: SQL-level pg_datum_image_equal
  In-Reply-To: <CAEze2WhsqYjg0oGY+7yooimUK7zRc9PY9u8u-Oo=VmJ+DAAkKg@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox