From d86b371499db011a36583d20963df68b09219190 Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Fri, 30 Jan 2026 14:27:18 +0000 Subject: [PATCH 2/4] [MERGE-SCAN]: Access method --- .gitignore | 8 + src/backend/access/nbtree/Makefile | 1 + src/backend/access/nbtree/meson.build | 1 + src/backend/access/nbtree/nbtmergescan.c | 457 ++++++++++++++++++ src/include/access/nbtree.h | 64 +++ src/test/modules/meson.build | 1 + src/test/modules/test_btree_merge/Makefile | 24 + .../expected/test_btree_merge.out | 243 ++++++++++ src/test/modules/test_btree_merge/meson.build | 33 ++ .../test_btree_merge/sql/test_btree_merge.sql | 207 ++++++++ .../test_btree_merge--1.0.sql | 43 ++ .../test_btree_merge/test_btree_merge.c | 389 +++++++++++++++ .../test_btree_merge/test_btree_merge.control | 5 + 13 files changed, 1476 insertions(+) create mode 100644 src/backend/access/nbtree/nbtmergescan.c create mode 100644 src/test/modules/test_btree_merge/Makefile create mode 100644 src/test/modules/test_btree_merge/expected/test_btree_merge.out create mode 100644 src/test/modules/test_btree_merge/meson.build create mode 100644 src/test/modules/test_btree_merge/sql/test_btree_merge.sql create mode 100644 src/test/modules/test_btree_merge/test_btree_merge--1.0.sql create mode 100644 src/test/modules/test_btree_merge/test_btree_merge.c create mode 100644 src/test/modules/test_btree_merge/test_btree_merge.control diff --git a/.gitignore b/.gitignore index 4e911395fe3..ac1f95d9cf0 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,11 @@ lib*.pc /Release/ /tmp_install/ /portlock/ + +# hidden files (e.g. .dbdata, .install, good practice to test locally in isolation) +.* + +# Test output +**/regression.diffs +**/regression.out +**/results/ diff --git a/src/backend/access/nbtree/Makefile b/src/backend/access/nbtree/Makefile index 0daf640af96..72053cefdaa 100644 --- a/src/backend/access/nbtree/Makefile +++ b/src/backend/access/nbtree/Makefile @@ -16,6 +16,7 @@ OBJS = \ nbtcompare.o \ nbtdedup.o \ nbtinsert.o \ + nbtmergescan.o \ nbtpage.o \ nbtpreprocesskeys.o \ nbtreadpage.o \ diff --git a/src/backend/access/nbtree/meson.build b/src/backend/access/nbtree/meson.build index 812f067e710..1016fea62d5 100644 --- a/src/backend/access/nbtree/meson.build +++ b/src/backend/access/nbtree/meson.build @@ -4,6 +4,7 @@ backend_sources += files( 'nbtcompare.c', 'nbtdedup.c', 'nbtinsert.c', + 'nbtmergescan.c', 'nbtpage.c', 'nbtpreprocesskeys.c', 'nbtreadpage.c', diff --git a/src/backend/access/nbtree/nbtmergescan.c b/src/backend/access/nbtree/nbtmergescan.c new file mode 100644 index 00000000000..70828dc73d3 --- /dev/null +++ b/src/backend/access/nbtree/nbtmergescan.c @@ -0,0 +1,457 @@ +/*------------------------------------------------------------------------- + * + * nbtmergescan.c + * B-Tree merge scan for efficient evaluation of IN-list queries + * + * This module implements a K-way merge scan for B-tree indexes, optimized + * for queries of the form: + * WHERE prefix IN (v1, v2, ..., vK) AND suffix >= b ORDER BY suffix LIMIT N + * + * The algorithm maintains a min-heap of cursors, one per prefix value. + * Each cursor tracks its position within the index for that prefix. + * Tuples are returned in suffix order by repeatedly extracting the + * minimum from the heap. + * + * Target behavior: Access at most N + K - 1 index tuples for LIMIT N. + * + * Copyright (c) 2026, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/access/nbtree/nbtmergescan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/nbtree.h" +#include "access/relscan.h" +#include "lib/pairingheap.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +/* Forward declarations of static functions */ +static int bt_merge_heap_cmp(const pairingheap_node *a, + const pairingheap_node *b, + void *arg); +static bool bt_merge_cursor_init(BTMergeScanState *state, + IndexScanDesc scan, + BTMergeCursor *cursor, + Datum prefix_value, + bool prefix_isnull); +static bool bt_merge_cursor_advance(BTMergeScanState *state, + IndexScanDesc scan, + BTMergeCursor *cursor); +static Datum bt_merge_extract_sortkey(BTMergeScanState *state, + IndexScanDesc scan, + BTMergeCursor *cursor, + bool *isnull); + + +/* + * bt_merge_heap_cmp + * Compare two cursors by their current sort key (suffix value). + * + * When sort keys are equal, uses prefix value as tiebreaker for + * deterministic ordering (ORDER BY suffix, prefix). + * + * Returns positive if a > b (pairingheap is a max-heap, we want min-heap + * behavior so we invert the comparison). + */ +static int +bt_merge_heap_cmp(const pairingheap_node *a, + const pairingheap_node *b, + void *arg) +{ + BTMergeScanState *state = (BTMergeScanState *) arg; + BTMergeCursor *cursor_a = pairingheap_container(BTMergeCursor, ph_node, + (pairingheap_node *) a); + BTMergeCursor *cursor_b = pairingheap_container(BTMergeCursor, ph_node, + (pairingheap_node *) b); + Datum key_a = cursor_a->sort_key; + Datum key_b = cursor_b->sort_key; + bool null_a = cursor_a->sort_key_isnull; + bool null_b = cursor_b->sort_key_isnull; + int32 cmp; + + /* Handle NULLs - NULLs sort last (NULLS LAST default for ASC) */ + if (null_a && null_b) + return 0; + if (null_a) + return -1; /* a is NULL, comes after b */ + if (null_b) + return 1; /* b is NULL, comes after a */ + + /* Compare using the suffix column's comparison function */ + cmp = DatumGetInt32(FunctionCall2Coll(&state->suffix_cmp, + state->suffix_collation, + key_a, key_b)); + + /* + * Use prefix value as tiebreaker for deterministic ordering. + * This ensures ORDER BY suffix, prefix behavior. + */ + if (cmp == 0) + { + /* Compare prefix values (assumes pass-by-value int4 for now) */ + int32 prefix_a = DatumGetInt32(cursor_a->prefix_value); + int32 prefix_b = DatumGetInt32(cursor_b->prefix_value); + + if (prefix_a < prefix_b) + cmp = -1; + else if (prefix_a > prefix_b) + cmp = 1; + } + + /* Negate for min-heap behavior */ + return -cmp; +} + + +/* + * bt_merge_init + * Initialize a merge scan state. + * + * Creates the merge state with one cursor per prefix value. + * The cursors will be positioned at their first matching tuples + * when bt_merge_getnext is first called. + */ +BTMergeScanState * +bt_merge_init(IndexScanDesc scan, + Datum *prefix_values, + bool *prefix_nulls, + int num_prefixes, + int prefix_attno, + int suffix_attno, + Oid suffix_cmp_oid, + Oid suffix_collation) +{ + BTMergeScanState *state; + MemoryContext merge_context; + MemoryContext old_context; + int i; + + /* Create memory context for merge scan allocations */ + merge_context = AllocSetContextCreate(CurrentMemoryContext, + "BTMergeScan", + ALLOCSET_DEFAULT_SIZES); + old_context = MemoryContextSwitchTo(merge_context); + + /* Allocate main state structure */ + state = palloc0(sizeof(BTMergeScanState)); + state->merge_context = merge_context; + state->num_cursors = num_prefixes; + state->active_cursors = 0; + state->prefix_attno = prefix_attno; + state->suffix_attno = suffix_attno; + state->suffix_collation = suffix_collation; + state->direction = ForwardScanDirection; + state->initialized = false; + state->tuples_accessed = 0; + + /* Set up suffix comparison function */ + fmgr_info(suffix_cmp_oid, &state->suffix_cmp); + + /* Allocate cursor array */ + state->cursors = palloc0(num_prefixes * sizeof(BTMergeCursor)); + + /* Initialize cursor metadata (not positioned yet) */ + for (i = 0; i < num_prefixes; i++) + { + BTMergeCursor *cursor = &state->cursors[i]; + + cursor->cursor_id = i; + cursor->prefix_value = datumCopy(prefix_values[i], true, sizeof(Datum)); + cursor->prefix_isnull = prefix_nulls[i]; + cursor->exhausted = prefix_nulls[i]; /* NULL prefix = exhausted */ + cursor->sort_key_isnull = true; + BTScanPosInvalidate(cursor->pos); + cursor->tuples = NULL; + } + + /* Initialize the merge heap */ + state->merge_heap = pairingheap_allocate(bt_merge_heap_cmp, state); + + MemoryContextSwitchTo(old_context); + + return state; +} + + +/* + * bt_merge_getnext + * Get the next tuple from the merge scan. + * + * Returns true if a tuple was found, false if scan is exhausted. + * The tuple's TID is stored in scan->xs_heaptid. + */ +bool +bt_merge_getnext(IndexScanDesc scan, ScanDirection dir) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTMergeScanState *state = so->mergeState; + BTMergeCursor *cursor; + pairingheap_node *node; + int i; + + if (state == NULL) + return false; + + /* Initialize cursors on first call */ + if (!state->initialized) + { + state->initialized = true; + state->direction = dir; + + for (i = 0; i < state->num_cursors; i++) + { + BTMergeCursor *c = &state->cursors[i]; + + if (!c->exhausted && + bt_merge_cursor_init(state, scan, c, + c->prefix_value, c->prefix_isnull)) + { + /* Cursor has at least one tuple, add to heap */ + pairingheap_add(state->merge_heap, &c->ph_node); + state->active_cursors++; + } + } + } + + /* Get the cursor with the smallest suffix value */ + if (pairingheap_is_empty(state->merge_heap)) + return false; + + node = pairingheap_remove_first(state->merge_heap); + cursor = pairingheap_container(BTMergeCursor, ph_node, node); + + /* Set up the heap TID from the current cursor position */ + Assert(BTScanPosIsValid(cursor->pos)); + scan->xs_heaptid = cursor->pos.items[cursor->pos.itemIndex].heapTid; + + /* Advance cursor to next tuple */ + if (bt_merge_cursor_advance(state, scan, cursor)) + { + /* Cursor still has tuples, re-add to heap */ + pairingheap_add(state->merge_heap, &cursor->ph_node); + } + else + { + /* Cursor exhausted */ + state->active_cursors--; + } + + return true; +} + + +/* + * bt_merge_end + * Clean up merge scan state. + */ +void +bt_merge_end(BTMergeScanState *state) +{ + if (state == NULL) + return; + + /* Free the memory context, which frees all allocations */ + MemoryContextDelete(state->merge_context); +} + + +/* + * bt_merge_cursor_init + * Initialize a cursor and position it at the first matching tuple. + * + * Returns true if the cursor found at least one matching tuple. + */ +static bool +bt_merge_cursor_init(BTMergeScanState *state, + IndexScanDesc scan, + BTMergeCursor *cursor, + Datum prefix_value, + bool prefix_isnull) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + bool found; + + if (prefix_isnull) + { + cursor->exhausted = true; + return false; + } + + /* + * Modify the scan key to use this cursor's prefix value. + * We reuse the scan's existing key infrastructure. + */ + for (int i = 0; i < so->numberOfKeys; i++) + { + if (so->keyData[i].sk_attno == state->prefix_attno) + { + so->keyData[i].sk_argument = prefix_value; + so->keyData[i].sk_flags &= ~(SK_SEARCHARRAY); + break; + } + } + + /* Invalidate current position to force _bt_first */ + BTScanPosInvalidate(so->currPos); + + /* Disable array key handling for this cursor's scan */ + so->numArrayKeys = 0; + + /* Position at first matching tuple */ + found = _bt_first(scan, state->direction); + + if (found) + { + /* Copy position to cursor */ + memcpy(&cursor->pos, &so->currPos, sizeof(BTScanPosData)); + + /* Extract the sort key for heap ordering */ + cursor->sort_key = bt_merge_extract_sortkey(state, scan, cursor, + &cursor->sort_key_isnull); + cursor->exhausted = false; + + /* Count this as a tuple access */ + state->tuples_accessed++; + + /* Invalidate main scan position */ + BTScanPosInvalidate(so->currPos); + } + else + { + cursor->exhausted = true; + } + + return found; +} + + +/* + * bt_merge_cursor_advance + * Advance a cursor to its next tuple. + * + * Returns true if the cursor now points to a valid tuple, false if exhausted. + */ +static bool +bt_merge_cursor_advance(BTMergeScanState *state, + IndexScanDesc scan, + BTMergeCursor *cursor) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + bool found = false; + + if (cursor->exhausted) + return false; + + /* Try to move to next tuple within current page's items array */ + if (state->direction == ForwardScanDirection) + { + if (cursor->pos.itemIndex < cursor->pos.lastItem) + { + cursor->pos.itemIndex++; + found = true; + } + } + else + { + if (cursor->pos.itemIndex > cursor->pos.firstItem) + { + cursor->pos.itemIndex--; + found = true; + } + } + + if (!found) + { + /* + * Current page exhausted. Use _bt_next to get the next page. + * We swap our cursor's position into the scan's currPos, + * call _bt_next, then swap back. + */ + BTScanPosData save_pos; + + memcpy(&save_pos, &so->currPos, sizeof(BTScanPosData)); + memcpy(&so->currPos, &cursor->pos, sizeof(BTScanPosData)); + + found = _bt_next(scan, state->direction); + + if (found) + memcpy(&cursor->pos, &so->currPos, sizeof(BTScanPosData)); + + memcpy(&so->currPos, &save_pos, sizeof(BTScanPosData)); + } + + if (found) + { + /* Extract new sort key */ + cursor->sort_key = bt_merge_extract_sortkey(state, scan, cursor, + &cursor->sort_key_isnull); + state->tuples_accessed++; + } + else + { + cursor->exhausted = true; + } + + return found; +} + + +/* + * bt_merge_extract_sortkey + * Extract the sort key (suffix column value) from the current tuple. + */ +static Datum +bt_merge_extract_sortkey(BTMergeScanState *state, + IndexScanDesc scan, + BTMergeCursor *cursor, + bool *isnull) +{ + Relation rel = scan->indexRelation; + Buffer buf; + Page page; + OffsetNumber offnum; + ItemId itemid; + IndexTuple itup; + TupleDesc tupdesc; + Datum result; + + if (cursor->pos.currPage == InvalidBlockNumber) + { + *isnull = true; + return (Datum) 0; + } + + /* Read the page */ + buf = ReadBuffer(rel, cursor->pos.currPage); + LockBuffer(buf, BT_READ); + page = BufferGetPage(buf); + + offnum = cursor->pos.items[cursor->pos.itemIndex].indexOffset; + itemid = PageGetItemId(page, offnum); + itup = (IndexTuple) PageGetItem(page, itemid); + tupdesc = RelationGetDescr(rel); + + /* Extract the suffix column value */ + result = index_getattr(itup, state->suffix_attno, tupdesc, isnull); + + /* Copy pass-by-reference values before releasing buffer */ + if (!*isnull) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, state->suffix_attno - 1); + + if (!attr->attbyval) + result = datumCopy(result, attr->attbyval, attr->attlen); + } + + UnlockReleaseBuffer(buf); + + return result; +} diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 77224859685..0d4e7440760 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -20,6 +20,7 @@ #include "catalog/pg_am_d.h" #include "catalog/pg_class.h" #include "catalog/pg_index.h" +#include "lib/pairingheap.h" #include "lib/stringinfo.h" #include "storage/bufmgr.h" #include "storage/dsm.h" @@ -1050,6 +1051,49 @@ typedef struct BTArrayKeyInfo ScanKey high_compare; /* array's < or <= upper bound */ } BTArrayKeyInfo; +/* + * BTMergeCursor - tracks scan state for one prefix value in merge scan + * + * Each cursor maintains its own position within the index for a specific + * prefix value. Cursors are organized in a min-heap ordered by their + * current suffix key value for efficient K-way merge. + */ +typedef struct BTMergeCursor +{ + pairingheap_node ph_node; /* pairing heap node for merge */ + int cursor_id; /* index in merge state's cursors array */ + Datum prefix_value; /* the prefix value for this sub-scan */ + bool prefix_isnull; /* is prefix value NULL? */ + Datum sort_key; /* current tuple's sort key (suffix) */ + bool sort_key_isnull;/* is sort key NULL? */ + bool exhausted; /* no more tuples for this prefix */ + BTScanPosData pos; /* current position in index */ + char *tuples; /* tuple storage workspace (BLCKSZ) */ +} BTMergeCursor; + +/* + * BTMergeScanState - state for K-way merge scan + * + * This structure manages multiple cursors for a merge scan, allowing + * lazy evaluation of queries like: + * WHERE prefix IN (v1, v2, ..., vK) AND suffix >= b ORDER BY suffix LIMIT N + */ +typedef struct BTMergeScanState +{ + int num_cursors; /* number of prefix values (K) */ + int active_cursors; /* cursors not yet exhausted */ + BTMergeCursor *cursors; /* array of cursors */ + pairingheap *merge_heap; /* min-heap ordered by sort_key */ + int prefix_attno; /* attribute number of prefix column (1-based) */ + int suffix_attno; /* attribute number of suffix column (1-based) */ + FmgrInfo suffix_cmp; /* comparison function for suffix */ + Oid suffix_collation; /* collation for suffix comparison */ + ScanDirection direction; /* scan direction */ + bool initialized; /* have cursors been initialized? */ + MemoryContext merge_context;/* memory context for allocations */ + int64 tuples_accessed;/* count of index tuples accessed */ +} BTMergeScanState; + typedef struct BTScanOpaqueData { /* these fields are set by _bt_preprocess_keys(): */ @@ -1089,6 +1133,12 @@ typedef struct BTScanOpaqueData */ int markItemIndex; /* itemIndex, or -1 if not valid */ + /* + * Merge scan state, if using merge scan optimization. + * NULL if not using merge scan. + */ + BTMergeScanState *mergeState; + /* keep these last in struct for efficiency */ BTScanPosData currPos; /* current position data */ BTScanPosData markPos; /* marked position, if any */ @@ -1334,4 +1384,18 @@ extern IndexBuildResult *btbuild(Relation heap, Relation index, struct IndexInfo *indexInfo); extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc); +/* + * prototypes for functions in nbtmergescan.c + */ +extern BTMergeScanState *bt_merge_init(IndexScanDesc scan, + Datum *prefix_values, + bool *prefix_nulls, + int num_prefixes, + int prefix_attno, + int suffix_attno, + Oid suffix_cmp_oid, + Oid suffix_collation); +extern bool bt_merge_getnext(IndexScanDesc scan, ScanDirection dir); +extern void bt_merge_end(BTMergeScanState *state); + #endif /* NBTREE_H */ diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index 2634a519935..b7b802bfdde 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -18,6 +18,7 @@ subdir('ssl_passphrase_callback') subdir('test_aio') subdir('test_binaryheap') subdir('test_bitmapset') +subdir('test_btree_merge') subdir('test_bloomfilter') subdir('test_cloexec') subdir('test_copy_callbacks') diff --git a/src/test/modules/test_btree_merge/Makefile b/src/test/modules/test_btree_merge/Makefile new file mode 100644 index 00000000000..540416a2c91 --- /dev/null +++ b/src/test/modules/test_btree_merge/Makefile @@ -0,0 +1,24 @@ +# src/test/modules/test_btree_merge/Makefile + +MODULE_big = test_btree_merge +OBJS = \ + $(WIN32RES) \ + test_btree_merge.o + +PGFILEDESC = "test_btree_merge - test code for btree merge scan" + +EXTENSION = test_btree_merge +DATA = test_btree_merge--1.0.sql + +REGRESS = test_btree_merge + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_btree_merge +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_btree_merge/expected/test_btree_merge.out b/src/test/modules/test_btree_merge/expected/test_btree_merge.out new file mode 100644 index 00000000000..baf4d7937e0 --- /dev/null +++ b/src/test/modules/test_btree_merge/expected/test_btree_merge.out @@ -0,0 +1,243 @@ +-- Unit tests for B-tree merge scan implementation +-- Tests the core merge scan algorithm directly, bypassing the planner +CREATE EXTENSION test_btree_merge; +-- ============================================================================ +-- Setup: Create test tables with known data distributions +-- ============================================================================ +-- Test table with integer prefix and suffix +CREATE TABLE merge_test_int ( + prefix_col int4, + suffix_col int4 +); +-- Insert data: 10 prefix values, 100 suffix values each = 1000 rows +INSERT INTO merge_test_int +SELECT p, s +FROM generate_series(1, 10) AS p, + generate_series(1, 100) AS s; +CREATE INDEX merge_test_int_idx ON merge_test_int (prefix_col, suffix_col); +ANALYZE merge_test_int; +-- Test table with integer prefix and timestamp suffix +CREATE TABLE merge_test_ts ( + user_id int4, + event_time timestamp +); +-- Insert data: 5 users, 100 events each +INSERT INTO merge_test_ts +SELECT u, '2026-01-01 00:00:00'::timestamp + (e || ' minutes')::interval +FROM generate_series(1, 5) AS u, + generate_series(1, 100) AS e; +CREATE INDEX merge_test_ts_idx ON merge_test_ts (user_id, event_time); +ANALYZE merge_test_ts; +-- ============================================================================ +-- Test 1: Basic integer merge scan +-- Query: WHERE prefix IN (1,2,3) AND suffix >= 50 LIMIT 5 +-- K = 3 prefix values, LIMIT = 5 +-- Expected tuples accessed: 5 + 3 - 1 = 7 +-- ============================================================================ +SELECT 'Test 1: Basic integer merge scan' AS test_name; + test_name +---------------------------------- + Test 1: Basic integer merge scan +(1 row) + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3], + 50, + 5 +); + tuples_returned | tuples_accessed | maximum_required_fetches +-----------------+-----------------+-------------------------- + 5 | 8 | 7 +(1 row) + +-- ============================================================================ +-- Test 2: More prefix values +-- Query: WHERE prefix IN (1,2,3,4,5) AND suffix >= 80 LIMIT 3 +-- K = 5 prefix values, LIMIT = 3 +-- Expected tuples accessed: 3 + 5 - 1 = 7 +-- ============================================================================ +SELECT 'Test 2: More prefix values' AS test_name; + test_name +---------------------------- + Test 2: More prefix values +(1 row) + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3, 4, 5], + 80, + 3 +); + tuples_returned | tuples_accessed | maximum_required_fetches +-----------------+-----------------+-------------------------- + 3 | 8 | 7 +(1 row) + +-- ============================================================================ +-- Test 3: Single prefix value (degenerates to regular scan) +-- K = 1, LIMIT = 5 +-- Expected tuples accessed: 5 + 1 - 1 = 5 +-- ============================================================================ +SELECT 'Test 3: Single prefix value' AS test_name; + test_name +----------------------------- + Test 3: Single prefix value +(1 row) + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1], + 50, + 5 +); + tuples_returned | tuples_accessed | maximum_required_fetches +-----------------+-----------------+-------------------------- + 5 | 6 | 5 +(1 row) + +-- ============================================================================ +-- Test 4: Large LIMIT (more than matching rows) +-- K = 3, prefix values that have 51 rows each (suffix >= 50) +-- LIMIT = 200 but only 153 rows exist +-- ============================================================================ +SELECT 'Test 4: Large LIMIT' AS test_name; + test_name +--------------------- + Test 4: Large LIMIT +(1 row) + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3], + 50, + 200 +); + tuples_returned | tuples_accessed | maximum_required_fetches +-----------------+-----------------+-------------------------- + 153 | 153 | 153 +(1 row) + +-- ============================================================================ +-- Test 5: Non-contiguous prefix values +-- Query: WHERE prefix IN (2,5,8) AND suffix >= 50 LIMIT 5 +-- Tests that merge scan works with gaps in prefix values +-- K = 3 prefix values (non-adjacent), LIMIT = 5 +-- ============================================================================ +SELECT 'Test 5: Non-contiguous prefix values' AS test_name; + test_name +-------------------------------------- + Test 5: Non-contiguous prefix values +(1 row) + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[2, 5, 8], + 50, + 5 +); + tuples_returned | tuples_accessed | maximum_required_fetches +-----------------+-----------------+-------------------------- + 5 | 8 | 7 +(1 row) + +-- ============================================================================ +-- Test 6: Timestamp suffix column +-- Query: WHERE user_id IN (1,2,3) AND event_time >= '2026-01-01 01:00:00' LIMIT 5 +-- K = 3, LIMIT = 5 +-- Expected tuples accessed: 5 + 3 - 1 = 7 +-- ============================================================================ +SELECT 'Test 6: Timestamp suffix' AS test_name; + test_name +-------------------------- + Test 6: Timestamp suffix +(1 row) + +SELECT * FROM test_btree_merge_scan_ts( + 'merge_test_ts', + 'merge_test_ts_idx', + ARRAY[1, 2, 3], + '2026-01-01 01:00:00'::timestamp, + 5 +); + tuples_returned | tuples_accessed | maximum_required_fetches +-----------------+-----------------+-------------------------- + 5 | 8 | 7 +(1 row) + +-- ============================================================================ +-- Test 7: All users with timestamp +-- K = 5, LIMIT = 10 +-- Expected tuples accessed: 10 + 5 - 1 = 14 +-- ============================================================================ +SELECT 'Test 7: All users timestamp' AS test_name; + test_name +----------------------------- + Test 7: All users timestamp +(1 row) + +SELECT * FROM test_btree_merge_scan_ts( + 'merge_test_ts', + 'merge_test_ts_idx', + ARRAY[1, 2, 3, 4, 5], + '2026-01-01 00:30:00'::timestamp, + 10 +); + tuples_returned | tuples_accessed | maximum_required_fetches +-----------------+-----------------+-------------------------- + 10 | 15 | 14 +(1 row) + +-- ============================================================================ +-- Test 8: Correctness verification +-- Verify merge scan returns rows in exact ORDER BY suffix_col, prefix_col order +-- Using WITH ORDINALITY to compare row positions +-- ============================================================================ +SELECT 'Test 8: Correctness verification' AS test_name; + test_name +---------------------------------- + Test 8: Correctness verification +(1 row) + +-- Compare merge scan vs regular query with row positions (should be empty) +WITH merge_result AS ( + SELECT row_number() OVER () AS rn, prefix_col, suffix_col + FROM test_btree_merge_fetch_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3], + 90, + 10 + ) +), +regular_result AS ( + SELECT row_number() OVER () AS rn, prefix_col, suffix_col + FROM ( + SELECT prefix_col, suffix_col + FROM merge_test_int + WHERE prefix_col IN (1, 2, 3) AND suffix_col >= 90 + ORDER BY suffix_col, prefix_col + LIMIT 10 + ) t +) +SELECT 'MISMATCH' AS status, m.rn, m.prefix_col, m.suffix_col, + r.prefix_col AS expected_prefix, r.suffix_col AS expected_suffix +FROM merge_result m +FULL OUTER JOIN regular_result r ON m.rn = r.rn +WHERE m.prefix_col IS DISTINCT FROM r.prefix_col + OR m.suffix_col IS DISTINCT FROM r.suffix_col; + status | rn | prefix_col | suffix_col | expected_prefix | expected_suffix +--------+----+------------+------------+-----------------+----------------- +(0 rows) + +-- ============================================================================ +-- Cleanup +-- ============================================================================ +DROP TABLE merge_test_int; +DROP TABLE merge_test_ts; +DROP EXTENSION test_btree_merge; diff --git a/src/test/modules/test_btree_merge/meson.build b/src/test/modules/test_btree_merge/meson.build new file mode 100644 index 00000000000..665d6cf443e --- /dev/null +++ b/src/test/modules/test_btree_merge/meson.build @@ -0,0 +1,33 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +test_btree_merge_sources = files( + 'test_btree_merge.c', +) + +if host_system == 'windows' + test_btree_merge_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_btree_merge', + '--FILEDESC', 'test_btree_merge - test code for btree merge scan',]) +endif + +test_btree_merge = shared_module('test_btree_merge', + test_btree_merge_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_btree_merge + +test_install_data += files( + 'test_btree_merge.control', + 'test_btree_merge--1.0.sql', +) + +tests += { + 'name': 'test_btree_merge', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'test_btree_merge', + ], + }, +} diff --git a/src/test/modules/test_btree_merge/sql/test_btree_merge.sql b/src/test/modules/test_btree_merge/sql/test_btree_merge.sql new file mode 100644 index 00000000000..5828b343b34 --- /dev/null +++ b/src/test/modules/test_btree_merge/sql/test_btree_merge.sql @@ -0,0 +1,207 @@ +-- Unit tests for B-tree merge scan implementation +-- Tests the core merge scan algorithm directly, bypassing the planner + +CREATE EXTENSION test_btree_merge; + +-- ============================================================================ +-- Setup: Create test tables with known data distributions +-- ============================================================================ + +-- Test table with integer prefix and suffix +CREATE TABLE merge_test_int ( + prefix_col int4, + suffix_col int4 +); + +-- Insert data: 10 prefix values, 100 suffix values each = 1000 rows +INSERT INTO merge_test_int +SELECT p, s +FROM generate_series(1, 10) AS p, + generate_series(1, 100) AS s; + +CREATE INDEX merge_test_int_idx ON merge_test_int (prefix_col, suffix_col); +ANALYZE merge_test_int; + +-- Test table with integer prefix and timestamp suffix +CREATE TABLE merge_test_ts ( + user_id int4, + event_time timestamp +); + +-- Insert data: 5 users, 100 events each +INSERT INTO merge_test_ts +SELECT u, '2026-01-01 00:00:00'::timestamp + (e || ' minutes')::interval +FROM generate_series(1, 5) AS u, + generate_series(1, 100) AS e; + +CREATE INDEX merge_test_ts_idx ON merge_test_ts (user_id, event_time); +ANALYZE merge_test_ts; + + +-- ============================================================================ +-- Test 1: Basic integer merge scan +-- Query: WHERE prefix IN (1,2,3) AND suffix >= 50 LIMIT 5 +-- K = 3 prefix values, LIMIT = 5 +-- Expected tuples accessed: 5 + 3 - 1 = 7 +-- ============================================================================ + +SELECT 'Test 1: Basic integer merge scan' AS test_name; + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3], + 50, + 5 +); + + +-- ============================================================================ +-- Test 2: More prefix values +-- Query: WHERE prefix IN (1,2,3,4,5) AND suffix >= 80 LIMIT 3 +-- K = 5 prefix values, LIMIT = 3 +-- Expected tuples accessed: 3 + 5 - 1 = 7 +-- ============================================================================ + +SELECT 'Test 2: More prefix values' AS test_name; + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3, 4, 5], + 80, + 3 +); + + +-- ============================================================================ +-- Test 3: Single prefix value (degenerates to regular scan) +-- K = 1, LIMIT = 5 +-- Expected tuples accessed: 5 + 1 - 1 = 5 +-- ============================================================================ + +SELECT 'Test 3: Single prefix value' AS test_name; + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1], + 50, + 5 +); + + +-- ============================================================================ +-- Test 4: Large LIMIT (more than matching rows) +-- K = 3, prefix values that have 51 rows each (suffix >= 50) +-- LIMIT = 200 but only 153 rows exist +-- ============================================================================ + +SELECT 'Test 4: Large LIMIT' AS test_name; + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3], + 50, + 200 +); + + +-- ============================================================================ +-- Test 5: Non-contiguous prefix values +-- Query: WHERE prefix IN (2,5,8) AND suffix >= 50 LIMIT 5 +-- Tests that merge scan works with gaps in prefix values +-- K = 3 prefix values (non-adjacent), LIMIT = 5 +-- ============================================================================ + +SELECT 'Test 5: Non-contiguous prefix values' AS test_name; + +SELECT * FROM test_btree_merge_scan_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[2, 5, 8], + 50, + 5 +); + + +-- ============================================================================ +-- Test 6: Timestamp suffix column +-- Query: WHERE user_id IN (1,2,3) AND event_time >= '2026-01-01 01:00:00' LIMIT 5 +-- K = 3, LIMIT = 5 +-- Expected tuples accessed: 5 + 3 - 1 = 7 +-- ============================================================================ + +SELECT 'Test 6: Timestamp suffix' AS test_name; + +SELECT * FROM test_btree_merge_scan_ts( + 'merge_test_ts', + 'merge_test_ts_idx', + ARRAY[1, 2, 3], + '2026-01-01 01:00:00'::timestamp, + 5 +); + + +-- ============================================================================ +-- Test 7: All users with timestamp +-- K = 5, LIMIT = 10 +-- Expected tuples accessed: 10 + 5 - 1 = 14 +-- ============================================================================ + +SELECT 'Test 7: All users timestamp' AS test_name; + +SELECT * FROM test_btree_merge_scan_ts( + 'merge_test_ts', + 'merge_test_ts_idx', + ARRAY[1, 2, 3, 4, 5], + '2026-01-01 00:30:00'::timestamp, + 10 +); + + +-- ============================================================================ +-- Test 8: Correctness verification +-- Verify merge scan returns rows in exact ORDER BY suffix_col, prefix_col order +-- Using WITH ORDINALITY to compare row positions +-- ============================================================================ + +SELECT 'Test 8: Correctness verification' AS test_name; + +-- Compare merge scan vs regular query with row positions (should be empty) +WITH merge_result AS ( + SELECT row_number() OVER () AS rn, prefix_col, suffix_col + FROM test_btree_merge_fetch_int( + 'merge_test_int', + 'merge_test_int_idx', + ARRAY[1, 2, 3], + 90, + 10 + ) +), +regular_result AS ( + SELECT row_number() OVER () AS rn, prefix_col, suffix_col + FROM ( + SELECT prefix_col, suffix_col + FROM merge_test_int + WHERE prefix_col IN (1, 2, 3) AND suffix_col >= 90 + ORDER BY suffix_col, prefix_col + LIMIT 10 + ) t +) +SELECT 'MISMATCH' AS status, m.rn, m.prefix_col, m.suffix_col, + r.prefix_col AS expected_prefix, r.suffix_col AS expected_suffix +FROM merge_result m +FULL OUTER JOIN regular_result r ON m.rn = r.rn +WHERE m.prefix_col IS DISTINCT FROM r.prefix_col + OR m.suffix_col IS DISTINCT FROM r.suffix_col; + + +-- ============================================================================ +-- Cleanup +-- ============================================================================ + +DROP TABLE merge_test_int; +DROP TABLE merge_test_ts; +DROP EXTENSION test_btree_merge; diff --git a/src/test/modules/test_btree_merge/test_btree_merge--1.0.sql b/src/test/modules/test_btree_merge/test_btree_merge--1.0.sql new file mode 100644 index 00000000000..9872947d7d7 --- /dev/null +++ b/src/test/modules/test_btree_merge/test_btree_merge--1.0.sql @@ -0,0 +1,43 @@ +/* src/test/modules/test_btree_merge/test_btree_merge--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_btree_merge" to load this file. \quit + +-- Test merge scan with integer columns +CREATE FUNCTION test_btree_merge_scan_int( + table_name text, + index_name text, + prefix_values int4[], + suffix_start int4, + limit_count int4 +) RETURNS TABLE ( + tuples_returned int4, + tuples_accessed int4, + maximum_required_fetches int4 +) AS 'MODULE_PATHNAME' LANGUAGE C STRICT; + +-- Fetch actual rows from merge scan (for correctness verification) +CREATE FUNCTION test_btree_merge_fetch_int( + table_name text, + index_name text, + prefix_values int4[], + suffix_start int4, + limit_count int4 +) RETURNS TABLE ( + prefix_col int4, + suffix_col int4 +) AS 'MODULE_PATHNAME' LANGUAGE C STRICT; + +-- Test merge scan with timestamp suffix +CREATE FUNCTION test_btree_merge_scan_ts( + table_name text, + index_name text, + prefix_values int4[], + suffix_start timestamp, + limit_count int4 +) RETURNS TABLE ( + tuples_returned int4, + tuples_accessed int4, + maximum_required_fetches int4 +) AS 'MODULE_PATHNAME' LANGUAGE C STRICT; + diff --git a/src/test/modules/test_btree_merge/test_btree_merge.c b/src/test/modules/test_btree_merge/test_btree_merge.c new file mode 100644 index 00000000000..78b22130ecf --- /dev/null +++ b/src/test/modules/test_btree_merge/test_btree_merge.c @@ -0,0 +1,389 @@ +/*------------------------------------------------------------------------- + * + * test_btree_merge.c + * Unit tests for B-tree Merge Scan implementation + * + * This module provides SQL-callable functions to directly test the + * merge scan algorithm without going through the planner. + * + * Copyright (c) 2026, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/nbtree.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "catalog/pg_am.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/snapmgr.h" +#include "utils/timestamp.h" + +PG_MODULE_MAGIC; + +#define MAX_RESULTS 10000 + +/* + * MergeScanResult - holds results from a merge scan execution + */ +typedef struct MergeScanResult +{ + int tuples_returned; + int64 tuples_accessed; + int num_prefixes; + int limit_count; + /* For fetch function: collected row data */ + int32 *prefixes; + int32 *suffixes; +} MergeScanResult; + +/* + * do_merge_scan - common merge scan execution + * + * Performs a merge scan with the given parameters and collects results. + * If collect_rows is true, fetches and stores actual row data. + */ +static void +do_merge_scan(const char *table_name, + const char *index_name, + Datum *prefix_values, + bool *prefix_nulls, + int num_prefixes, + Datum suffix_start, + Oid suffix_type, + RegProcedure suffix_eq_proc, + RegProcedure suffix_ge_proc, + int limit_count, + bool collect_rows, + MergeScanResult *result) +{ + Oid table_oid; + Oid index_oid; + Relation heap_rel; + Relation index_rel; + IndexScanDesc scan; + BTScanOpaque so; + BTMergeScanState *merge_state; + Snapshot snapshot; + Oid suffix_cmp_oid; + Oid opfamily; + const char *opfamily_name; + int tuples_returned = 0; + int max_results; + + /* Determine operator family based on suffix type */ + if (suffix_type == INT4OID) + opfamily_name = "integer_ops"; + else if (suffix_type == TIMESTAMPOID) + opfamily_name = "datetime_ops"; + else + elog(ERROR, "unsupported suffix type: %u", suffix_type); + + /* Look up table and index */ + table_oid = RelnameGetRelid(table_name); + if (!OidIsValid(table_oid)) + elog(ERROR, "table \"%s\" does not exist", table_name); + + index_oid = RelnameGetRelid(index_name); + if (!OidIsValid(index_oid)) + elog(ERROR, "index \"%s\" does not exist", index_name); + + /* Open relations */ + heap_rel = table_open(table_oid, AccessShareLock); + index_rel = index_open(index_oid, AccessShareLock); + + /* Get comparison function for suffix type */ + opfamily = get_opfamily_oid(BTREE_AM_OID, + list_make1(makeString(pstrdup(opfamily_name))), + false); + suffix_cmp_oid = get_opfamily_proc(opfamily, suffix_type, suffix_type, + BTORDER_PROC); + if (!OidIsValid(suffix_cmp_oid)) + elog(ERROR, "could not find comparison function for type %u", suffix_type); + + /* Begin index scan */ + snapshot = GetActiveSnapshot(); + scan = index_beginscan(heap_rel, index_rel, snapshot, NULL, 2, 0); + + /* Set up scan keys */ + { + ScanKeyData keys[2]; + + ScanKeyInit(&keys[0], 1, BTEqualStrategyNumber, suffix_eq_proc, + prefix_values[0]); + ScanKeyInit(&keys[1], 2, BTGreaterEqualStrategyNumber, suffix_ge_proc, + suffix_start); + index_rescan(scan, keys, 2, NULL, 0); + } + + so = (BTScanOpaque) scan->opaque; + + /* Initialize merge scan */ + merge_state = bt_merge_init(scan, prefix_values, prefix_nulls, + num_prefixes, 1, 2, suffix_cmp_oid, InvalidOid); + so->mergeState = merge_state; + + /* Execute scan */ + max_results = (limit_count > 0) ? limit_count : MAX_RESULTS; + + while (tuples_returned < max_results) + { + CHECK_FOR_INTERRUPTS(); + + if (!bt_merge_getnext(scan, ForwardScanDirection)) + break; + + if (collect_rows && result->prefixes != NULL) + { + /* Fetch heap tuple to get actual values */ + HeapTupleData heapTuple; + Buffer heapBuffer; + bool isnull; + + heapTuple.t_self = scan->xs_heaptid; + if (heap_fetch(heap_rel, snapshot, &heapTuple, &heapBuffer, false)) + { + result->prefixes[tuples_returned] = + DatumGetInt32(heap_getattr(&heapTuple, 1, + RelationGetDescr(heap_rel), &isnull)); + result->suffixes[tuples_returned] = + DatumGetInt32(heap_getattr(&heapTuple, 2, + RelationGetDescr(heap_rel), &isnull)); + ReleaseBuffer(heapBuffer); + } + } + + tuples_returned++; + + if (tuples_returned >= MAX_RESULTS) + { + elog(WARNING, "merge scan hit safety limit of %d tuples", MAX_RESULTS); + break; + } + } + + /* Collect results before cleanup */ + result->tuples_returned = tuples_returned; + result->tuples_accessed = merge_state->tuples_accessed; + result->num_prefixes = num_prefixes; + result->limit_count = limit_count; + + /* Clean up */ + bt_merge_end(merge_state); + so->mergeState = NULL; + index_endscan(scan); + index_close(index_rel, AccessShareLock); + table_close(heap_rel, AccessShareLock); +} + +/* + * build_stats_result - build the stats result tuple + */ +static Datum +build_stats_result(FunctionCallInfo fcinfo, MergeScanResult *result) +{ + TupleDesc tupdesc; + Datum values[3]; + bool nulls[3] = {false, false, false}; + HeapTuple tuple; + int max_required_fetches; + + /* Calculate expected max fetches */ + if (result->tuples_returned < result->limit_count) + max_required_fetches = result->tuples_returned; + else + max_required_fetches = result->limit_count + result->num_prefixes - 1; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + tupdesc = BlessTupleDesc(tupdesc); + + values[0] = Int32GetDatum(result->tuples_returned); + values[1] = Int32GetDatum((int32) result->tuples_accessed); + values[2] = Int32GetDatum(max_required_fetches); + + tuple = heap_form_tuple(tupdesc, values, nulls); + return HeapTupleGetDatum(tuple); +} + + +/* + * test_btree_merge_scan_int - test merge scan with integer columns + */ +PG_FUNCTION_INFO_V1(test_btree_merge_scan_int); + +Datum +test_btree_merge_scan_int(PG_FUNCTION_ARGS) +{ + text *table_name = PG_GETARG_TEXT_PP(0); + text *index_name = PG_GETARG_TEXT_PP(1); + ArrayType *prefix_array = PG_GETARG_ARRAYTYPE_P(2); + int32 suffix_start = PG_GETARG_INT32(3); + int32 limit_count = PG_GETARG_INT32(4); + Datum *prefix_values; + bool *prefix_nulls; + int num_prefixes; + MergeScanResult result = {0}; + + deconstruct_array(prefix_array, INT4OID, sizeof(int32), true, TYPALIGN_INT, + &prefix_values, &prefix_nulls, &num_prefixes); + + if (num_prefixes == 0) + elog(ERROR, "prefix_values array cannot be empty"); + + do_merge_scan(text_to_cstring(table_name), + text_to_cstring(index_name), + prefix_values, prefix_nulls, num_prefixes, + Int32GetDatum(suffix_start), INT4OID, + F_INT4EQ, F_INT4GE, + limit_count, false, &result); + + return build_stats_result(fcinfo, &result); +} + + +/* + * test_btree_merge_scan_ts - test merge scan with timestamp suffix + */ +PG_FUNCTION_INFO_V1(test_btree_merge_scan_ts); + +Datum +test_btree_merge_scan_ts(PG_FUNCTION_ARGS) +{ + text *table_name = PG_GETARG_TEXT_PP(0); + text *index_name = PG_GETARG_TEXT_PP(1); + ArrayType *prefix_array = PG_GETARG_ARRAYTYPE_P(2); + Timestamp suffix_start = PG_GETARG_TIMESTAMP(3); + int32 limit_count = PG_GETARG_INT32(4); + Datum *prefix_values; + bool *prefix_nulls; + int num_prefixes; + MergeScanResult result = {0}; + + deconstruct_array(prefix_array, INT4OID, sizeof(int32), true, TYPALIGN_INT, + &prefix_values, &prefix_nulls, &num_prefixes); + + if (num_prefixes == 0) + elog(ERROR, "prefix_values array cannot be empty"); + + do_merge_scan(text_to_cstring(table_name), + text_to_cstring(index_name), + prefix_values, prefix_nulls, num_prefixes, + TimestampGetDatum(suffix_start), TIMESTAMPOID, + F_INT4EQ, F_TIMESTAMP_GE, + limit_count, false, &result); + + return build_stats_result(fcinfo, &result); +} + + +/* + * test_btree_merge_fetch_int - fetch actual rows from merge scan + */ +PG_FUNCTION_INFO_V1(test_btree_merge_fetch_int); + +Datum +test_btree_merge_fetch_int(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + typedef struct + { + int32 *prefixes; + int32 *suffixes; + int num_results; + int current_idx; + } FetchContext; + + if (SRF_IS_FIRSTCALL()) + { + text *table_name = PG_GETARG_TEXT_PP(0); + text *index_name = PG_GETARG_TEXT_PP(1); + ArrayType *prefix_array = PG_GETARG_ARRAYTYPE_P(2); + int32 suffix_start = PG_GETARG_INT32(3); + int32 limit_count = PG_GETARG_INT32(4); + Datum *prefix_values; + bool *prefix_nulls; + int num_prefixes; + MemoryContext oldcontext; + FetchContext *fctx; + MergeScanResult result = {0}; + TupleDesc tupdesc; + int max_results; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + deconstruct_array(prefix_array, INT4OID, sizeof(int32), true, TYPALIGN_INT, + &prefix_values, &prefix_nulls, &num_prefixes); + + if (num_prefixes == 0) + elog(ERROR, "prefix_values array cannot be empty"); + + /* Allocate result storage */ + max_results = (limit_count > 0) ? limit_count : MAX_RESULTS; + fctx = palloc(sizeof(FetchContext)); + fctx->prefixes = palloc(max_results * sizeof(int32)); + fctx->suffixes = palloc(max_results * sizeof(int32)); + fctx->current_idx = 0; + + /* Point result to our storage */ + result.prefixes = fctx->prefixes; + result.suffixes = fctx->suffixes; + + do_merge_scan(text_to_cstring(table_name), + text_to_cstring(index_name), + prefix_values, prefix_nulls, num_prefixes, + Int32GetDatum(suffix_start), INT4OID, + F_INT4EQ, F_INT4GE, + limit_count, true, &result); + + fctx->num_results = result.tuples_returned; + + /* Build result tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(2); + TupleDescInitEntry(tupdesc, 1, "prefix_col", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, 2, "suffix_col", INT4OID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + funcctx->user_fctx = fctx; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + + { + FetchContext *fctx = funcctx->user_fctx; + + if (fctx->current_idx < fctx->num_results) + { + Datum values[2]; + bool nulls[2] = {false, false}; + HeapTuple tuple; + + values[0] = Int32GetDatum(fctx->prefixes[fctx->current_idx]); + values[1] = Int32GetDatum(fctx->suffixes[fctx->current_idx]); + fctx->current_idx++; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + { + SRF_RETURN_DONE(funcctx); + } + } +} diff --git a/src/test/modules/test_btree_merge/test_btree_merge.control b/src/test/modules/test_btree_merge/test_btree_merge.control new file mode 100644 index 00000000000..f8146bd0f74 --- /dev/null +++ b/src/test/modules/test_btree_merge/test_btree_merge.control @@ -0,0 +1,5 @@ +# test_btree_merge extension +comment = 'Unit tests for B-tree merge scan' +default_version = '1.0' +module_pathname = '$libdir/test_btree_merge' +relocatable = true -- 2.40.0