public inbox for [email protected]
help / color / mirror / Atom feedFrom: David Rowley <[email protected]>
To: Chao Li <[email protected]>
Cc: PostgreSQL Developers <[email protected]>
Subject: Re: Add bms_offset_members() function for bitshifting Bitmapsets
Date: Wed, 15 Apr 2026 11:45:12 +1200
Message-ID: <CAApHDvoALVPgVSCuGdAfNUbxx_NacnAd0nuj8Hi=_-61AEqzaQ@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
References: <CAApHDvq=eEdw2Qp+aSzSOtTSe+h0fnVQ55CcTNqBkLDYiRZmxw@mail.gmail.com>
<[email protected]>
Thanks for looking.
On Tue, 14 Apr 2026 at 20:46, Chao Li <[email protected]> wrote:
> + if (((uint64) new_nwords - 1) * BITS_PER_BITMAPWORD + high_bit + offset_bits > PG_INT32_MAX)
> + elog(ERROR, "bitmapset overflow");
> This overflow check seems wrong. Because when high_bit + offset_bits > BITS_PER_BITMAPWORD, new_nwords has been increased by 1, so there high_bit + offset_bits are double counted.
Your idea of checking the old highest member plus the offset seems a
more robust method, so I've adjusted the patch to use that.
David
Attachments:
[application/octet-stream] v2-0001-Introduce-bms_offset_members-function.patch (17.9K, 2-v2-0001-Introduce-bms_offset_members-function.patch)
download | inline diff:
From c6dc1ef00d597d13e79df60eab320dc35fee7336 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Fri, 20 Mar 2026 11:01:13 +1300
Subject: [PATCH v2] Introduce bms_offset_members() function
Effectively a function to bitshift members by the specified number of
bits. We have various fragments of code doing this manually with a
bms_next_member() loop. We can do this more efficiently with
bitshifting.
---
src/backend/nodes/bitmapset.c | 160 ++++++++++++++++++
src/backend/optimizer/plan/setrefs.c | 10 +-
src/backend/optimizer/prep/prepjointree.c | 28 +--
src/backend/rewrite/rewriteManip.c | 8 +-
src/backend/statistics/extended_stats.c | 12 +-
src/include/nodes/bitmapset.h | 1 +
.../expected/test_bitmapset.out | 38 +++++
.../test_bitmapset/sql/test_bitmapset.sql | 13 ++
.../test_bitmapset/test_bitmapset--1.0.sql | 8 +
.../modules/test_bitmapset/test_bitmapset.c | 87 +++++++++-
10 files changed, 326 insertions(+), 39 deletions(-)
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index f053d8c4d64..458f8f69c51 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -991,6 +991,166 @@ bms_replace_members(Bitmapset *a, const Bitmapset *b)
return a;
}
+/*
+ * bms_offset_members
+ * Adjust all existing members of the given set by adding 'offset' to
+ * each member. The offset value may be negative or positive. Members
+ * which would become negative as a result of a negative offset will be
+ * removed from the set, whereas too large an offset which would result
+ * in a member going > INT_MAX will result in an ERROR.
+ *
+ * The set is modified in-place when possible, but callers must assume the set
+ * can be reallocated and use the returned set.
+ */
+Bitmapset *
+bms_offset_members(Bitmapset *a, int offset)
+{
+ int offset_words;
+ int offset_bits;
+ int new_nwords;
+ int old_nwords;
+ int32 high_bit;
+
+ Assert(bms_is_valid_set(a));
+
+ /* Don't go to any effort if there's nothing to do */
+ if (a == NULL)
+ return NULL;
+
+ old_nwords = a->nwords;
+ offset_words = WORDNUM(offset);
+ offset_bits = BITNUM(offset);
+ new_nwords = a->nwords + offset_words;
+ high_bit = bmw_leftmost_one_pos(a->words[a->nwords - 1]);
+
+ /* Adjust the number of words in the new set to how many we'll need */
+ new_nwords += (offset_bits + high_bit) >= BITS_PER_BITMAPWORD;
+ new_nwords -= (offset_bits + high_bit) < 0;
+
+ /* If we don't need any words then it must be an empty set */
+ if (new_nwords <= 0)
+ {
+ pfree(a);
+ return NULL;
+ }
+
+ /* Handle a positive offset (bitshift left) */
+ if (offset > 0)
+ {
+ int old_highest = (old_nwords - 1) * BITS_PER_BITMAPWORD + high_bit;
+
+ /*
+ * An unlikely scenario, but check we're not going to create a member
+ * greater than PG_INT32_MAX.
+ */
+ if ((uint64) old_highest + offset > PG_INT32_MAX)
+ elog(ERROR, "bitmapset overflow");
+
+ /*
+ * Reallocate the memory for the set if we need more words, otherwise
+ * we reuse the existing set.
+ */
+ if (new_nwords > a->nwords)
+ a = (Bitmapset *) repalloc0(a, BITMAPSET_SIZE(old_nwords),
+ BITMAPSET_SIZE(new_nwords));
+
+ a->nwords = new_nwords;
+
+ /* Shift words up to higher array elements, if needed */
+ if (offset_words > 0)
+ {
+ for (int i = old_nwords - 1; i >= 0; i--)
+ a->words[i + offset_words] = a->words[i];
+
+ /* zero out the old members from the lower, now unused words */
+ for (int i = 0; i < offset_words; i++)
+ a->words[i] = 0;
+ }
+
+ /* Now handle shifting up the bits, if needed */
+ if (offset_bits > 0)
+ {
+ int carry_bits = BITS_PER_BITMAPWORD - offset_bits;
+ bitmapword prev_carry = 0;
+
+ for (int i = offset_words; i < new_nwords; i++)
+ {
+ bitmapword carry = (a->words[i] >> carry_bits);
+
+ /* shift bits up and carry the bits from the previous word */
+ a->words[i] = (a->words[i] << offset_bits) | prev_carry;
+ prev_carry = carry;
+ }
+ }
+ }
+
+ /* Handle negative offset (bitshift right) */
+ else
+ {
+ /* Make the negative shift_words and shift_bits positive numbers */
+ offset_words = 0 - offset_words;
+ offset_bits = 0 - offset_bits;
+
+ /*
+ * Shrink the set to store the new number of words. We can't repalloc
+ * here as we need to still access the memory for the upper words to
+ * move them. In any case, repalloc with a smaller allocation likely
+ * is just going to return the same chunk, so doing so would just be
+ * needless overhead.
+ */
+ a->nwords = new_nwords;
+
+ /* Shift words down to lower elements, if needed */
+ if (offset_words > 0)
+ {
+ for (int i = 0; i <= new_nwords; i++)
+ a->words[i] = (i + offset_words) >= old_nwords ? 0 : a->words[i + offset_words];
+
+ /*
+ * Don't bother with zeroing the upper words. They're no longer
+ * visible due to reducing nwords down to the new size.
+ */
+ }
+
+ /* Now handle shifting down the bits, if needed */
+ if (offset_bits > 0)
+ {
+ int carry_bits = BITS_PER_BITMAPWORD - offset_bits;
+ bitmapword prev_carry = 0;
+
+ if (new_nwords < old_nwords)
+ prev_carry = (a->words[new_nwords] << carry_bits);
+
+ for (int i = new_nwords - 1; i >= 0; i--)
+ {
+ bitmapword carry = (a->words[i] << carry_bits);
+
+ /* shift bits down and carry the bits from the previous word */
+ a->words[i] = (a->words[i] >> offset_bits) | prev_carry;
+ prev_carry = carry;
+ }
+ }
+ }
+
+#ifdef REALLOCATE_BITMAPSETS
+
+ /*
+ * There's no guarantee that the repalloc returned a new pointer, so copy
+ * and free unconditionally here.
+ */
+ a = bms_copy_and_free(a);
+#else
+
+ /*
+ * That was a fair bit of bit twiddling, so make sure we've still got a
+ * valid set with no leading zero words
+ */
+ Assert(bms_is_valid_set(a));
+#endif
+
+ return a;
+}
+
/*
* bms_add_range
* Add members in the range of 'lower' to 'upper' to the set.
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index ff0e875f2a2..86a8d993997 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -2057,16 +2057,10 @@ set_hash_references(PlannerInfo *root, Plan *plan, int rtoffset)
static Relids
offset_relid_set(Relids relids, int rtoffset)
{
- Relids result = NULL;
- int rtindex;
-
- /* If there's no offset to apply, we needn't recompute the value */
+ /* If there's no offset to apply, we needn't make a copy */
if (rtoffset == 0)
return relids;
- rtindex = -1;
- while ((rtindex = bms_next_member(relids, rtindex)) >= 0)
- result = bms_add_member(result, rtindex + rtoffset);
- return result;
+ return bms_offset_members(bms_copy(relids), rtoffset);
}
/*
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 95bf51606cc..072fd47db9e 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -3751,22 +3751,22 @@ has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars,
if (bms_is_member(varno, right_state->nullable_rels))
continue;
- /*
- * Iterate over attributes and adjust the bitmap indexes by
- * FirstLowInvalidHeapAttributeNumber to get the actual attribute
- * numbers.
- */
- attno = -1;
- while ((attno = bms_next_member(attrs, attno)) >= 0)
- {
- AttrNumber real_attno = attno + FirstLowInvalidHeapAttributeNumber;
+ /* Find the lowest member to check if system columns are present */
+ attno = bms_next_member(attrs, -1);
- /* system columns cannot be NULL */
- if (real_attno < 0)
- return true;
+ /* We checked for an empty set above */
+ Assert(attno >= 0);
- forcednullattnums = bms_add_member(forcednullattnums, real_attno);
- }
+ /* system columns cannot be NULL */
+ if (attno + FirstLowInvalidHeapAttributeNumber < 0)
+ return true;
+
+ /*
+ * Adjust the bitmap indexes by FirstLowInvalidHeapAttributeNumber to
+ * get the actual attribute numbers.
+ */
+ forcednullattnums = bms_offset_members(bms_copy(attrs),
+ FirstLowInvalidHeapAttributeNumber);
rte = rt_fetch(varno, root->parse->rtable);
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 4bf4aa0d6d1..e1017ddf42c 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -527,13 +527,7 @@ OffsetVarNodes(Node *node, int offset, int sublevels_up)
static Relids
offset_relid_set(Relids relids, int offset)
{
- Relids result = NULL;
- int rtindex;
-
- rtindex = -1;
- while ((rtindex = bms_next_member(relids, rtindex)) >= 0)
- result = bms_add_member(result, rtindex + offset);
- return result;
+ return bms_offset_members(bms_copy(relids), offset);
}
/*
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 2b83355d26e..f2c0e2d35d9 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1673,8 +1673,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
*/
if (!leakproof)
{
- Bitmapset *clause_attnums = NULL;
- int attnum = -1;
+ Bitmapset *clause_attnums;
/*
* We have to check per-column privileges. *attnums has the attnums
@@ -1685,13 +1684,8 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
* while attnums within *attnums aren't. Convert *attnums to the
* offset style so we can combine the results.
*/
- while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
- {
- clause_attnums =
- bms_add_member(clause_attnums,
- attnum - FirstLowInvalidHeapAttributeNumber);
- }
-
+ clause_attnums = bms_offset_members(bms_copy(*attnums),
+ 0 - FirstLowInvalidHeapAttributeNumber);
/* Now merge attnums from *exprs into clause_attnums */
if (*exprs != NIL)
pull_varattnos((Node *) *exprs, relid, &clause_attnums);
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 067ec72e99b..748fa2ad58b 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -123,6 +123,7 @@ extern Bitmapset *bms_add_member(Bitmapset *a, int x);
extern Bitmapset *bms_del_member(Bitmapset *a, int x);
extern Bitmapset *bms_add_members(Bitmapset *a, const Bitmapset *b);
extern Bitmapset *bms_replace_members(Bitmapset *a, const Bitmapset *b);
+extern Bitmapset *bms_offset_members(Bitmapset *a, int offset);
extern Bitmapset *bms_add_range(Bitmapset *a, int lower, int upper);
extern Bitmapset *bms_int_members(Bitmapset *a, const Bitmapset *b);
extern Bitmapset *bms_del_members(Bitmapset *a, const Bitmapset *b);
diff --git a/src/test/modules/test_bitmapset/expected/test_bitmapset.out b/src/test/modules/test_bitmapset/expected/test_bitmapset.out
index f75cb46b869..3a6f08427cd 100644
--- a/src/test/modules/test_bitmapset/expected/test_bitmapset.out
+++ b/src/test/modules/test_bitmapset/expected/test_bitmapset.out
@@ -760,6 +760,37 @@ SELECT test_bms_compare(NULL, NULL) AS result;
0
(1 row)
+-- bms_offset_members()
+-- Ensure overflow detection works
+SELECT test_bms_offset_members('(b 1)', 2147483647);
+ERROR: bitmapset overflow
+SELECT test_bms_offset_members('(b 2)', 2147483646);
+ERROR: bitmapset overflow
+-- Ensure members are all offset as expected
+SELECT test_bms_offset_members('(b 1 3 5)', 1);
+ test_bms_offset_members
+-------------------------
+ (b 2 4 6)
+(1 row)
+
+SELECT test_bms_offset_members('(b 1 3 5)', -1);
+ test_bms_offset_members
+-------------------------
+ (b 0 2 4)
+(1 row)
+
+SELECT test_bms_offset_members('(b 31 32 63 64)', 1);
+ test_bms_offset_members
+-------------------------
+ (b 32 33 64 65)
+(1 row)
+
+SELECT test_bms_offset_members('(b 31 32 63 64)', -1);
+ test_bms_offset_members
+-------------------------
+ (b 30 31 62 63)
+(1 row)
+
-- bms_add_range()
SELECT test_bms_add_range('(b)', -5, 10); -- error
ERROR: negative bitmapset member not allowed
@@ -1575,4 +1606,11 @@ SELECT test_random_operations(-1, 10000, 81920, 0) > 0 AS result;
t
(1 row)
+-- perform some random test on bms_offset_members()
+SELECT test_random_offset_operations(-1, 10000, 1024, 0) AS result;
+ result
+--------
+ 10000
+(1 row)
+
DROP EXTENSION test_bitmapset;
diff --git a/src/test/modules/test_bitmapset/sql/test_bitmapset.sql b/src/test/modules/test_bitmapset/sql/test_bitmapset.sql
index e75ab8f620a..5c5cfe3fc15 100644
--- a/src/test/modules/test_bitmapset/sql/test_bitmapset.sql
+++ b/src/test/modules/test_bitmapset/sql/test_bitmapset.sql
@@ -201,6 +201,16 @@ SELECT test_bms_compare('(b 5)', NULL) AS result;
SELECT test_bms_compare(NULL, '(b 5)') AS result;
SELECT test_bms_compare(NULL, NULL) AS result;
+-- bms_offset_members()
+-- Ensure overflow detection works
+SELECT test_bms_offset_members('(b 1)', 2147483647);
+SELECT test_bms_offset_members('(b 2)', 2147483646);
+-- Ensure members are all offset as expected
+SELECT test_bms_offset_members('(b 1 3 5)', 1);
+SELECT test_bms_offset_members('(b 1 3 5)', -1);
+SELECT test_bms_offset_members('(b 31 32 63 64)', 1);
+SELECT test_bms_offset_members('(b 31 32 63 64)', -1);
+
-- bms_add_range()
SELECT test_bms_add_range('(b)', -5, 10); -- error
SELECT test_bms_add_range('(b)', 5, 7) AS result;
@@ -403,4 +413,7 @@ SELECT test_bms_nonempty_difference('(b 1 2)', '(b 50 100)') AS result;
-- random operations
SELECT test_random_operations(-1, 10000, 81920, 0) > 0 AS result;
+-- perform some random test on bms_offset_members()
+SELECT test_random_offset_operations(-1, 10000, 1024, 0) AS result;
+
DROP EXTENSION test_bitmapset;
diff --git a/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql b/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql
index 227ecb5aa3b..ea12ec40057 100644
--- a/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql
+++ b/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql
@@ -120,6 +120,10 @@ CREATE FUNCTION test_bms_replace_members(text, text)
RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE C;
+CREATE FUNCTION test_bms_offset_members(text, integer)
+RETURNS text
+AS 'MODULE_PATHNAME' LANGUAGE C;
+
CREATE FUNCTION test_bms_join(text, text)
RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE C;
@@ -137,4 +141,8 @@ CREATE FUNCTION test_random_operations(integer, integer, integer, integer)
RETURNS integer STRICT
AS 'MODULE_PATHNAME' LANGUAGE C;
+CREATE FUNCTION test_random_offset_operations(integer, integer, integer, integer)
+RETURNS integer STRICT
+AS 'MODULE_PATHNAME' LANGUAGE C;
+
COMMENT ON EXTENSION test_bitmapset IS 'Test code for Bitmapset';
diff --git a/src/test/modules/test_bitmapset/test_bitmapset.c b/src/test/modules/test_bitmapset/test_bitmapset.c
index 272bed390a6..1fbf9ffc200 100644
--- a/src/test/modules/test_bitmapset/test_bitmapset.c
+++ b/src/test/modules/test_bitmapset/test_bitmapset.c
@@ -59,12 +59,14 @@ PG_FUNCTION_INFO_V1(test_bms_add_members);
PG_FUNCTION_INFO_V1(test_bms_int_members);
PG_FUNCTION_INFO_V1(test_bms_del_members);
PG_FUNCTION_INFO_V1(test_bms_replace_members);
+PG_FUNCTION_INFO_V1(test_bms_offset_members);
PG_FUNCTION_INFO_V1(test_bms_join);
PG_FUNCTION_INFO_V1(test_bitmap_hash);
PG_FUNCTION_INFO_V1(test_bitmap_match);
/* Test utility functions */
PG_FUNCTION_INFO_V1(test_random_operations);
+PG_FUNCTION_INFO_V1(test_random_offset_operations);
/* Convenient macros to test results */
#define EXPECT_TRUE(expr) \
@@ -530,6 +532,18 @@ test_bms_replace_members(PG_FUNCTION_ARGS)
PG_RETURN_BITMAPSET_AS_TEXT(bms1);
}
+Datum
+test_bms_offset_members(PG_FUNCTION_ARGS)
+{
+ Bitmapset *bms = PG_ARG_GETBITMAPSET(0);
+ int offset = PG_GETARG_INT32(1);
+
+ /* left input gets recycled */
+ bms = bms_offset_members(bms, offset);
+
+ PG_RETURN_BITMAPSET_AS_TEXT(bms);
+}
+
Datum
test_bms_join(PG_FUNCTION_ARGS)
{
@@ -580,7 +594,7 @@ test_bitmap_match(PG_FUNCTION_ARGS)
}
/*
- * Contrary to all the other functions which are one-one mappings with the
+ * Contrary to most of the other functions which are one-one mappings with the
* equivalent C functions, this stresses Bitmapsets in a random fashion for
* various operations.
*
@@ -742,3 +756,74 @@ test_random_operations(PG_FUNCTION_ARGS)
PG_RETURN_INT32(total_ops);
}
+
+/*
+ * Random testing for bms_offset_members(). Generates a random set and then
+ * picks a number to offset the members by. We then create another set, which
+ * is built by looping over the members of the random set and performing
+ * bms_add_member and adding on the offset to create a known good set to
+ * compare the result of bms_offset_members() to.
+ *
+ * Arguments:
+ * arg1: optional random seed, or < 0 to use a random seed.
+ * arg2: the number of operations to perform.
+ * arg3: the maximum bitmapset member number to use in the random set.
+ * arg4: the minimum bitmapset member number to use in the random set.
+ */
+Datum
+test_random_offset_operations(PG_FUNCTION_ARGS)
+{
+ Bitmapset *bms1;
+ Bitmapset *bms2;
+ pg_prng_state state;
+ uint64 seed = GetCurrentTimestamp();
+ int num_ops;
+ int max_range;
+ int min_value;
+ int member;
+
+ if (PG_GETARG_INT32(0) > 0)
+ seed = PG_GETARG_INT32(0);
+
+ num_ops = PG_GETARG_INT32(1);
+ max_range = PG_GETARG_INT32(2);
+ min_value = PG_GETARG_INT32(3);
+
+ pg_prng_seed(&state, seed);
+
+ for (int op = 0; op < num_ops; op++)
+ {
+ int offset;
+
+ bms1 = NULL;
+ for (int i = 0; i < 10; i++)
+ {
+ member = pg_prng_uint32(&state) % max_range + min_value;
+
+ bms1 = bms_add_member(bms1, member);
+ }
+
+ offset = (pg_prng_uint32(&state) % max_range) - (pg_prng_uint32(&state) % max_range);
+
+ /* create a known-good set the old fashioned way */
+ bms2 = NULL;
+ member = -1;
+ while ((member = bms_next_member(bms1, member)) >= 0)
+ {
+ if (member + offset >= 0)
+ bms2 = bms_add_member(bms2, member + offset);
+ }
+
+ /* do the offsetting */
+ bms1 = bms_offset_members(bms1, offset);
+
+ /* check against the known-good set */
+ if (!bms_equal(bms1, bms2))
+ elog(ERROR, "bms_offset_members failed with offset %d seed " INT64_FORMAT, offset, seed);
+
+ bms_free(bms1);
+ bms_free(bms2);
+ }
+
+ PG_RETURN_INT32(num_ops);
+}
--
2.51.0
view thread (15+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected]
Subject: Re: Add bms_offset_members() function for bitshifting Bitmapsets
In-Reply-To: <CAApHDvoALVPgVSCuGdAfNUbxx_NacnAd0nuj8Hi=_-61AEqzaQ@mail.gmail.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox