public inbox for [email protected]  
help / color / mirror / Atom feed
From: David Rowley <[email protected]>
To: Tom Lane <[email protected]>
Cc: PostgreSQL Developers <[email protected]>
Subject: Re: Small and unlikely overflow hazard in bms_next_member()
Date: Mon, 13 Apr 2026 01:17:02 +1200
Message-ID: <CAApHDvr1B2gbf6JF69QmueM2QNRvbQeeKLxDnF=w9f9--022uA@mail.gmail.com> (raw)
In-Reply-To: <CAApHDvqTUm3Cbgz3ZLV+ad8s_HJHZYrVbrBvGyPQdxCRR-6dvA@mail.gmail.com>
References: <CAApHDvq0T=iJ0Sf5TNE9yyWwfOeVjmrBt0wSywDnGD9Y4YJQBA@mail.gmail.com>
	<[email protected]>
	<CAApHDvrvvq_m+nRwjsOpCsFa4EtVtmvJX7zAD=Siria-x6DpbQ@mail.gmail.com>
	<CAApHDvqTUm3Cbgz3ZLV+ad8s_HJHZYrVbrBvGyPQdxCRR-6dvA@mail.gmail.com>

On Fri, 3 Apr 2026 at 15:08, David Rowley <[email protected]> wrote:
> IMO, if we can make bitmapset.c work with INT_MAX members and get a
> performance increase, then we should do it.

Re-thinking this after a week's holiday, it seems fine to use an
unsigned 32-bit int rather than a 64-bit int to fix this bug. I'd
previously been uncertain if there were any guarantees in C to what
(unsigned int) -1 would return, but going by [1] at 6.3.1.3, it says:

"Otherwise, if the new type is unsigned, the value is converted by
repeatedly adding or subtracting one more than the maximum value that
can be represented in the new type until the value is in the range of
the new type."

So, it seems even on one's complement that -1 as an unsigned int will
be UINT_MAX. When we add 1 to UINT_MAX, we're guaranteed to get 0, as
it's unsigned maths and overflows are going to result in a value
modulus the max value for the type.

That leads me to the attached v2 patch. Compiler Explorer link showing
the assembly at [2].

Testing the performance, it's better than master, so I got rid of the
size_t wordnum stuff. We're post-freeze now, so fiddling with other
optimisations seems a bit off the table as there appears to be no
performance regression to compensate for now, per:

drowley@amd3990x:~$ gcc test_bms_next3.c -O2 -o test_bms_next3 &&
./test_bms_next3
Benchmarking 100000000 bms_next_member iterations...
master: 1.18330 seconds
Patched: 1.05493 seconds

Benchmarking 100000000 bms_prev_member iterations...
master: 2.94522 seconds
Patched: 1.86130 seconds

drowley@amd3990x:~$ clang test_bms_next3.c -O2 -o test_bms_next3 &&
./test_bms_next3
Benchmarking 100000000 bms_next_member iterations...
master: 1.07860 seconds
Patched: 1.07896 seconds

Benchmarking 100000000 bms_prev_member iterations...
master: 2.76550 seconds
Patched: 2.12369 seconds

David

[1] https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1548.pdf
[2] https://godbolt.org/z/xW96rxd3P

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <limits.h>

//#define NULL ((void *) 0)
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
#define BITS_PER_BITMAPWORD 64
typedef uint64 bitmapword;	  /* must be an unsigned type */
typedef int64 signedbitmapword; /* must be the matching signed type */

#define WORDNUM(x)  ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x)   ((x) % BITS_PER_BITMAPWORD)

#ifdef __GNUC__
#define likely(x)	__builtin_expect((x) != 0, 1)
#define unlikely(x) __builtin_expect((x) != 0, 0)
#else
#define likely(x)	((x) != 0)
#define unlikely(x) ((x) != 0)
#endif

typedef struct Bitmapset
{
	int		 nwords;		/* number of words in array */
	bitmapword  words[];	/* really [nwords] */
} Bitmapset;

static inline int
bmw_rightmost_one_pos(uint64 word)
{
	return __builtin_ctzll(word);
}

static inline int
bmw_leftmost_one_pos(uint64 word)
{
	return 63 - __builtin_clzll(word);
}

int
bms_next_member(const Bitmapset *a, int prevbit)
{
	int		 nwords;
	bitmapword  mask;

	if (a == NULL)
		return -2;

	nwords = a->nwords;
	prevbit++;
	mask = (~(bitmapword) 0) << BITNUM(prevbit);
	for (int wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
	{
		bitmapword  w = a->words[wordnum];

		/* ignore bits before prevbit */
		w &= mask;

		if (w != 0)
		{
			int		 result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_rightmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}

int
bms_next_member_patched(const Bitmapset *a, int prevbit)
{
	unsigned int currbit = prevbit;
	int			nwords;
	bitmapword	mask;

	if (a == NULL)
		return -2;
	nwords = a->nwords;

	/* use an unsigned int to avoid the risk that int overflows */
	currbit++;
	mask = (~(bitmapword) 0) << BITNUM(currbit);
	for (int wordnum = WORDNUM(currbit); wordnum < nwords; wordnum++)
	{
		bitmapword	w = a->words[wordnum];

		/* ignore bits before currbit */
		w &= mask;

		if (w != 0)
		{
			int			result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_rightmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}

int
bms_prev_member(const Bitmapset *a, int prevbit)
{
	int			ushiftbits;
	bitmapword	mask;

	/*
	 * If set is NULL or if there are no more bits to the right then we've
	 * nothing to do.
	 */
	if (a == NULL || prevbit == 0)
		return -2;

	/* transform -1 to the highest possible bit we could have set */
	if (prevbit == -1)
		prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
	else
		prevbit--;

	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
	mask = (~(bitmapword) 0) >> ushiftbits;
	for (int wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
	{
		bitmapword	w = a->words[wordnum];

		/* mask out bits left of prevbit */
		w &= mask;

		if (w != 0)
		{
			int			result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_leftmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}

int
bms_prev_member_patched(const Bitmapset *a, int prevbit)
{
	unsigned int currbit;
	int			ushiftbits;
	bitmapword	mask;


	/*
	 * If set is NULL or if there are no more bits to the right then we've
	 * nothing to do.
	 */
	if (a == NULL || prevbit == 0)
		return -2;

	/*
	 * Transform -1 to the highest possible bit we could have set.  We do this
	 * in unsigned math to avoid the risk of overflowing a signed int.
	 */
	if (prevbit < 0)
		currbit = (unsigned int) a->nwords * BITS_PER_BITMAPWORD - 1;
	else
		currbit = prevbit - 1;

	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(currbit) + 1);
	mask = (~(bitmapword) 0) >> ushiftbits;
	for (int wordnum = WORDNUM(currbit); wordnum >= 0; wordnum--)
	{
		bitmapword	w = a->words[wordnum];

		/* mask out bits left of currbit */
		w &= mask;

		if (w != 0)
		{
			int			result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_leftmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}


double get_time() {
	struct timespec ts;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
	return ts.tv_sec + ts.tv_nsec * 1e-9;
}

Bitmapset *bms;


int main() {
	int words_to_alloc = 1; // Large set to bypass CPU cache slightly
	bms = malloc(sizeof(Bitmapset) + words_to_alloc * sizeof(bitmapword));
	bms->nwords = words_to_alloc;
	memset(bms->words, 0, words_to_alloc * sizeof(bitmapword));
	double start, end;
	int64 count = 0;

	/* Set a bit far into the set to force a long scan */
	bms->words[words_to_alloc - 1] |= 0xaf4;

	int iterations = 100000000;


	printf("Benchmarking %d bms_next_member iterations...\n", iterations);

	/* master */
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_next_member(bms, j)) >= 0)
			count++;
	}
	end = get_time();
	printf("master: %.5f seconds\n", end - start);

	// Test David
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_next_member_patched(bms, j)) >= 0)
			count++;
	}

	end = get_time();
	printf("Patched: %.5f seconds\n", end - start);

	printf("\nBenchmarking %d bms_prev_member iterations...\n", iterations);

	/* master */
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_prev_member(bms, j)) >= 0)
			count++;
	}
	end = get_time();
	printf("master: %.5f seconds\n", end - start);

	// Test David
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_prev_member_patched(bms, j)) >= 0)
			count++;
	}

	end = get_time();
	printf("Patched: %.5f seconds\n", end - start);

	printf("%ld\n", count);
	free(bms);
	return 0;
}

Attachments:

  [text/plain] test_bms_next3.c (5.7K, 2-test_bms_next3.c)
  download | inline:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <limits.h>

//#define NULL ((void *) 0)
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
#define BITS_PER_BITMAPWORD 64
typedef uint64 bitmapword;	  /* must be an unsigned type */
typedef int64 signedbitmapword; /* must be the matching signed type */

#define WORDNUM(x)  ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x)   ((x) % BITS_PER_BITMAPWORD)

#ifdef __GNUC__
#define likely(x)	__builtin_expect((x) != 0, 1)
#define unlikely(x) __builtin_expect((x) != 0, 0)
#else
#define likely(x)	((x) != 0)
#define unlikely(x) ((x) != 0)
#endif

typedef struct Bitmapset
{
	int		 nwords;		/* number of words in array */
	bitmapword  words[];	/* really [nwords] */
} Bitmapset;

static inline int
bmw_rightmost_one_pos(uint64 word)
{
	return __builtin_ctzll(word);
}

static inline int
bmw_leftmost_one_pos(uint64 word)
{
	return 63 - __builtin_clzll(word);
}

int
bms_next_member(const Bitmapset *a, int prevbit)
{
	int		 nwords;
	bitmapword  mask;

	if (a == NULL)
		return -2;

	nwords = a->nwords;
	prevbit++;
	mask = (~(bitmapword) 0) << BITNUM(prevbit);
	for (int wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
	{
		bitmapword  w = a->words[wordnum];

		/* ignore bits before prevbit */
		w &= mask;

		if (w != 0)
		{
			int		 result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_rightmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}

int
bms_next_member_patched(const Bitmapset *a, int prevbit)
{
	unsigned int currbit = prevbit;
	int			nwords;
	bitmapword	mask;

	if (a == NULL)
		return -2;
	nwords = a->nwords;

	/* use an unsigned int to avoid the risk that int overflows */
	currbit++;
	mask = (~(bitmapword) 0) << BITNUM(currbit);
	for (int wordnum = WORDNUM(currbit); wordnum < nwords; wordnum++)
	{
		bitmapword	w = a->words[wordnum];

		/* ignore bits before currbit */
		w &= mask;

		if (w != 0)
		{
			int			result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_rightmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}

int
bms_prev_member(const Bitmapset *a, int prevbit)
{
	int			ushiftbits;
	bitmapword	mask;

	/*
	 * If set is NULL or if there are no more bits to the right then we've
	 * nothing to do.
	 */
	if (a == NULL || prevbit == 0)
		return -2;

	/* transform -1 to the highest possible bit we could have set */
	if (prevbit == -1)
		prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
	else
		prevbit--;

	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
	mask = (~(bitmapword) 0) >> ushiftbits;
	for (int wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
	{
		bitmapword	w = a->words[wordnum];

		/* mask out bits left of prevbit */
		w &= mask;

		if (w != 0)
		{
			int			result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_leftmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}

int
bms_prev_member_patched(const Bitmapset *a, int prevbit)
{
	unsigned int currbit;
	int			ushiftbits;
	bitmapword	mask;


	/*
	 * If set is NULL or if there are no more bits to the right then we've
	 * nothing to do.
	 */
	if (a == NULL || prevbit == 0)
		return -2;

	/*
	 * Transform -1 to the highest possible bit we could have set.  We do this
	 * in unsigned math to avoid the risk of overflowing a signed int.
	 */
	if (prevbit < 0)
		currbit = (unsigned int) a->nwords * BITS_PER_BITMAPWORD - 1;
	else
		currbit = prevbit - 1;

	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(currbit) + 1);
	mask = (~(bitmapword) 0) >> ushiftbits;
	for (int wordnum = WORDNUM(currbit); wordnum >= 0; wordnum--)
	{
		bitmapword	w = a->words[wordnum];

		/* mask out bits left of currbit */
		w &= mask;

		if (w != 0)
		{
			int			result;

			result = wordnum * BITS_PER_BITMAPWORD;
			result += bmw_leftmost_one_pos(w);
			return result;
		}

		/* in subsequent words, consider all bits */
		mask = (~(bitmapword) 0);
	}
	return -2;
}


double get_time() {
	struct timespec ts;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
	return ts.tv_sec + ts.tv_nsec * 1e-9;
}

Bitmapset *bms;


int main() {
	int words_to_alloc = 1; // Large set to bypass CPU cache slightly
	bms = malloc(sizeof(Bitmapset) + words_to_alloc * sizeof(bitmapword));
	bms->nwords = words_to_alloc;
	memset(bms->words, 0, words_to_alloc * sizeof(bitmapword));
	double start, end;
	int64 count = 0;

	/* Set a bit far into the set to force a long scan */
	bms->words[words_to_alloc - 1] |= 0xaf4;

	int iterations = 100000000;


	printf("Benchmarking %d bms_next_member iterations...\n", iterations);

	/* master */
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_next_member(bms, j)) >= 0)
			count++;
	}
	end = get_time();
	printf("master: %.5f seconds\n", end - start);

	// Test David
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_next_member_patched(bms, j)) >= 0)
			count++;
	}

	end = get_time();
	printf("Patched: %.5f seconds\n", end - start);

	printf("\nBenchmarking %d bms_prev_member iterations...\n", iterations);

	/* master */
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_prev_member(bms, j)) >= 0)
			count++;
	}
	end = get_time();
	printf("master: %.5f seconds\n", end - start);

	// Test David
	start = get_time();
	for (int i = 0; i < iterations; i++)
	{
		int j = -1;
		while ((j = bms_prev_member_patched(bms, j)) >= 0)
			count++;
	}

	end = get_time();
	printf("Patched: %.5f seconds\n", end - start);

	printf("%ld\n", count);
	free(bms);
	return 0;
}

  [application/octet-stream] v2-0001-Fix-unlikely-overflow-bug-in-bms_next_member.patch (3.2K, 3-v2-0001-Fix-unlikely-overflow-bug-in-bms_next_member.patch)
  download | inline diff:
From d1c644ba4de05624fc5a63b3d5f4026c71ed4aa5 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Sat, 4 Apr 2026 16:41:23 +1300
Subject: [PATCH v2] Fix unlikely overflow bug in bms_next_member()

... and bms_prev_member().

Both of these functions won't work correctly when given a prevbit of
INT_MAX.  Here we fix that by using an unsigned int to calculate which
member to look for next.

In practise, Bitmapsets will never have such a large member, so no
backpatch.

Author: David Rowley <[email protected]>
Reviewed-by: Chao Li <[email protected]>
Discussion: https://postgr.es/m/CAApHDvq0T%3DiJ0Sf5TNE9yyWwfOeVjmrBt0wSywDnGD9Y4YJQBA%40mail.gmail.com
---
 src/backend/nodes/bitmapset.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 786f343b3c9..957172648c3 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -1289,6 +1289,7 @@ bms_join(Bitmapset *a, Bitmapset *b)
 int
 bms_next_member(const Bitmapset *a, int prevbit)
 {
+	unsigned int currbit = prevbit;
 	int			nwords;
 	bitmapword	mask;
 
@@ -1297,13 +1298,15 @@ bms_next_member(const Bitmapset *a, int prevbit)
 	if (a == NULL)
 		return -2;
 	nwords = a->nwords;
-	prevbit++;
-	mask = (~(bitmapword) 0) << BITNUM(prevbit);
-	for (int wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
+
+	/* use an unsigned int to avoid the risk that int overflows */
+	currbit++;
+	mask = (~(bitmapword) 0) << BITNUM(currbit);
+	for (int wordnum = WORDNUM(currbit); wordnum < nwords; wordnum++)
 	{
 		bitmapword	w = a->words[wordnum];
 
-		/* ignore bits before prevbit */
+		/* ignore bits before currbit */
 		w &= mask;
 
 		if (w != 0)
@@ -1345,10 +1348,10 @@ bms_next_member(const Bitmapset *a, int prevbit)
  * It makes no difference in simple loop usage, but complex iteration logic
  * might need such an ability.
  */
-
 int
 bms_prev_member(const Bitmapset *a, int prevbit)
 {
+	unsigned int currbit;
 	int			ushiftbits;
 	bitmapword	mask;
 
@@ -1362,22 +1365,24 @@ bms_prev_member(const Bitmapset *a, int prevbit)
 		return -2;
 
 	/* Validate callers didn't give us something out of range */
-	Assert(prevbit <= a->nwords * BITS_PER_BITMAPWORD);
-	Assert(prevbit >= -1);
+	Assert(prevbit < 0 || prevbit <= (unsigned int) (a->nwords * BITS_PER_BITMAPWORD));
 
-	/* transform -1 to the highest possible bit we could have set */
-	if (prevbit == -1)
-		prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
+	/*
+	 * Transform -1 to the highest possible bit we could have set.  We do this
+	 * in unsigned math to avoid the risk of overflowing a signed int.
+	 */
+	if (prevbit < 0)
+		currbit = (unsigned int) a->nwords * BITS_PER_BITMAPWORD - 1;
 	else
-		prevbit--;
+		currbit = prevbit - 1;
 
-	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
+	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(currbit) + 1);
 	mask = (~(bitmapword) 0) >> ushiftbits;
-	for (int wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
+	for (int wordnum = WORDNUM(currbit); wordnum >= 0; wordnum--)
 	{
 		bitmapword	w = a->words[wordnum];
 
-		/* mask out bits left of prevbit */
+		/* mask out bits left of currbit */
 		w &= mask;
 
 		if (w != 0)
-- 
2.51.0



view thread (17+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected]
  Subject: Re: Small and unlikely overflow hazard in bms_next_member()
  In-Reply-To: <CAApHDvr1B2gbf6JF69QmueM2QNRvbQeeKLxDnF=w9f9--022uA@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox