From dfefff9d2e28979dee92f3b61e954c3c8b1ee236 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 12 Nov 2025 14:31:24 +0700
Subject: [PATCH v7 2/4] Detect common prefix to avoid wasted work during radix
 sort

Start radix sort at the most significant byte position that has more
than one distinct byte in the input. This skips passes where radix
sort would count the distinct bytes just to find only a single one,
in which case there is nothing further to do for that pass. This can
give a few percent speedup for integers that have some zero upper
bytes, which is common for those that didn't arrive via abbreviation.

Reviewed-by: Chengpeng Yan <chengpeng_yan@outlook.com>
Discussion: https://postgr.es/m/CANWCAZYpGMDSSwAa18fOxJGXaPzVdyPsWpOkfCX32DWh3Qznzw@mail.gmail.com
---
 src/backend/utils/sort/tuplesort.c | 66 ++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 07fa83c7944..7b22546a811 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -104,6 +104,7 @@
 #include "commands/tablespace.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
+#include "port/pg_bitutils.h"
 #include "storage/shmem.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
@@ -2928,10 +2929,67 @@ sort_byvalue_datum(SortTuple *data, size_t n, Tuplesortstate *state)
 	}
 	else
 	{
-		radix_sort_tuple(not_null_start,
-						 not_null_count,
-						 0,
-						 state);
+		int			common_prefix;
+		Datum		first_datum = 0;
+		Datum		common_upper_bits = 0;
+
+		/*
+		 * Compute the common prefix to skip unproductive recursion steps
+		 * during radix sort.
+		 */
+		for (SortTuple *st = not_null_start;
+			 st < not_null_start + not_null_count;
+			 st++)
+		{
+			Datum		this_datum = st->datum1;
+
+			if (st == not_null_start)
+			{
+				/*
+				 * Need to start with some value, may as well be the first
+				 * one.
+				 */
+				first_datum = this_datum;
+			}
+			else
+			{
+				/*
+				 * Accumulate bits that represent a difference from the
+				 * reference datum.
+				 */
+				common_upper_bits |= first_datum ^ this_datum;
+			}
+		}
+
+		if (common_upper_bits == 0)
+		{
+			/*
+			 * All NOT NULL tuples have the same datum, so we can skip radix
+			 * sort. Sort using the tiebreak comparator if necessary.
+			 */
+			if (state->base.onlyKey == NULL)
+			{
+				qsort_tuple(not_null_start,
+							not_null_count,
+							state->base.comparetup_tiebreak,
+							state);
+			}
+		}
+		else
+		{
+			/*
+			 * The upper bits of common_upper_bits are zero where all datums
+			 * have the same bits. The byte position of the leftmost one bit
+			 * is the byte where radix sort should start.
+			 */
+			common_prefix = SIZEOF_DATUM - 1 -
+				(pg_leftmost_one_pos64(common_upper_bits) / BITS_PER_BYTE);
+
+			radix_sort_tuple(not_null_start,
+							 not_null_count,
+							 common_prefix,
+							 state);
+		}
 	}
 }
 
-- 
2.53.0

