From ad123a3f8da3d95262b2553e90dd9c8fbb8d2335 Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Thu, 5 Feb 2026 05:09:48 +0000 Subject: [PATCH 3/4] [MERGE-SCAN] Planner integration --- src/backend/access/index/genam.c | 2 + src/backend/access/nbtree/nbtmergescan.c | 60 ++++++- src/backend/access/nbtree/nbtree.c | 129 +++++++++++++++ src/backend/executor/nodeIndexonlyscan.c | 5 +- src/backend/executor/nodeIndexscan.c | 11 ++ src/backend/optimizer/path/indxpath.c | 188 ++++++++++++++++++++++ src/backend/optimizer/plan/createplan.c | 8 + src/backend/optimizer/util/pathnode.c | 2 + src/include/access/relscan.h | 3 + src/include/nodes/execnodes.h | 5 + src/include/nodes/pathnodes.h | 1 + src/include/nodes/plannodes.h | 4 + src/test/regress/expected/btree_merge.out | 16 +- src/test/regress/sql/btree_merge.sql | 9 ++ 14 files changed, 437 insertions(+), 6 deletions(-) diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 5e89b86a62c..53615fb08d2 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -126,6 +126,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->xs_hitup = NULL; scan->xs_hitupdesc = NULL; + scan->xs_num_merge_prefixes = 0; + return scan; } diff --git a/src/backend/access/nbtree/nbtmergescan.c b/src/backend/access/nbtree/nbtmergescan.c index 70828dc73d3..eda1e683525 100644 --- a/src/backend/access/nbtree/nbtmergescan.c +++ b/src/backend/access/nbtree/nbtmergescan.c @@ -27,6 +27,7 @@ #include "access/relscan.h" #include "lib/pairingheap.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/bufmgr.h" #include "utils/datum.h" #include "utils/lsyscache.h" @@ -169,7 +170,8 @@ bt_merge_init(IndexScanDesc scan, cursor->exhausted = prefix_nulls[i]; /* NULL prefix = exhausted */ cursor->sort_key_isnull = true; BTScanPosInvalidate(cursor->pos); - cursor->tuples = NULL; + /* Allocate tuple workspace for index-only scans */ + cursor->tuples = palloc(BLCKSZ); } /* Initialize the merge heap */ @@ -219,6 +221,15 @@ bt_merge_getnext(IndexScanDesc scan, ScanDirection dir) state->active_cursors++; } } + + /* + * Track internal tuple reads for stats. We read active_cursors tuples + * during initialization. One of these will be returned first and + * counted by index_getnext_tid, so we count (active_cursors - 1) here. + */ + if (state->active_cursors > 1) + pgstat_count_index_tuples(scan->indexRelation, + state->active_cursors - 1); } /* Get the cursor with the smallest suffix value */ @@ -228,9 +239,15 @@ bt_merge_getnext(IndexScanDesc scan, ScanDirection dir) node = pairingheap_remove_first(state->merge_heap); cursor = pairingheap_container(BTMergeCursor, ph_node, node); - /* Set up the heap TID from the current cursor position */ + /* Set up the heap TID and index tuple from the current cursor position */ Assert(BTScanPosIsValid(cursor->pos)); - scan->xs_heaptid = cursor->pos.items[cursor->pos.itemIndex].heapTid; + { + BTScanPosItem *currItem = &cursor->pos.items[cursor->pos.itemIndex]; + scan->xs_heaptid = currItem->heapTid; + /* For index-only scans, set the index tuple pointer */ + if (cursor->tuples) + scan->xs_itup = (IndexTuple) (cursor->tuples + currItem->tupleOffset); + } /* Advance cursor to next tuple */ if (bt_merge_cursor_advance(state, scan, cursor)) @@ -255,9 +272,23 @@ bt_merge_getnext(IndexScanDesc scan, ScanDirection dir) void bt_merge_end(BTMergeScanState *state) { + int i; + if (state == NULL) return; + /* Release any buffer pins held by cursors */ + for (i = 0; i < state->num_cursors; i++) + { + BTMergeCursor *cursor = &state->cursors[i]; + + if (BTScanPosIsValid(cursor->pos) && BufferIsValid(cursor->pos.buf)) + { + ReleaseBuffer(cursor->pos.buf); + cursor->pos.buf = InvalidBuffer; + } + } + /* Free the memory context, which frees all allocations */ MemoryContextDelete(state->merge_context); } @@ -302,8 +333,14 @@ bt_merge_cursor_init(BTMergeScanState *state, /* Invalidate current position to force _bt_first */ BTScanPosInvalidate(so->currPos); - /* Disable array key handling for this cursor's scan */ + /* + * Disable array key handling for this cursor's scan. + * We need to clear both numArrayKeys and needPrimScan to avoid + * assertions in _bt_readfirstpage that expect array keys when + * needPrimScan is set. + */ so->numArrayKeys = 0; + so->needPrimScan = false; /* Position at first matching tuple */ found = _bt_first(scan, state->direction); @@ -313,6 +350,16 @@ bt_merge_cursor_init(BTMergeScanState *state, /* Copy position to cursor */ memcpy(&cursor->pos, &so->currPos, sizeof(BTScanPosData)); + /* + * Copy the tuple data for index-only scans. + * The tuple workspace contains copies of index tuples referenced + * by items in currPos. + */ + if (so->currTuples && so->currPos.nextTupleOffset > 0) + { + memcpy(cursor->tuples, so->currTuples, so->currPos.nextTupleOffset); + } + /* Extract the sort key for heap ordering */ cursor->sort_key = bt_merge_extract_sortkey(state, scan, cursor, &cursor->sort_key_isnull); @@ -390,6 +437,11 @@ bt_merge_cursor_advance(BTMergeScanState *state, if (found) { + /* + * Don't count here - the advanced-to tuple will be returned later + * and counted by index_getnext_tid at that time. + */ + /* Extract new sort key */ cursor->sort_key = bt_merge_extract_sortkey(state, scan, cursor, &cursor->sort_key_isnull); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 3dec1ee657d..0e55c4874b4 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -21,6 +21,8 @@ #include "access/nbtree.h" #include "access/relscan.h" #include "access/stratnum.h" +#include "catalog/pg_amop.h" +#include "utils/array.h" #include "commands/progress.h" #include "commands/vacuum.h" #include "nodes/execnodes.h" @@ -34,6 +36,7 @@ #include "utils/datum.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" @@ -98,6 +101,8 @@ static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btsca BTScanOpaque so); static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, BTScanOpaque so); +static bool bt_init_merge_scan_from_keys(IndexScanDesc scan); + static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid); @@ -221,6 +226,106 @@ btinsert(Relation rel, Datum *values, bool *isnull, return result; } +/* + * bt_init_merge_scan_from_keys + * Initialize merge scan state from the preprocessed scan keys. + * + * Returns true if merge scan was successfully initialized. + * Returns false if merge scan cannot be used (e.g., no suitable array key). + */ +static bool +bt_init_merge_scan_from_keys(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Relation rel = scan->indexRelation; + TupleDesc itupdesc = RelationGetDescr(rel); + ScanKey arrayKey = NULL; + ArrayType *arr; + Datum *prefix_values; + bool *prefix_nulls; + int num_prefixes; + int prefix_attno; + int suffix_attno; + Oid suffix_cmp_oid; + Oid suffix_collation; + Oid opfamily; + Oid elemtype; + int16 elemlen; + bool elembyval; + char elemalign; + int i; + + /* Look for SK_SEARCHARRAY on first column in the raw scan keys */ + for (i = 0; i < scan->numberOfKeys; i++) + { + ScanKey sk = &scan->keyData[i]; + + if ((sk->sk_flags & SK_SEARCHARRAY) && + sk->sk_attno == 1 && + sk->sk_strategy == BTEqualStrategyNumber) + { + arrayKey = sk; + break; + } + } + + if (arrayKey == NULL) + return false; + + /* Extract array values from the scan key */ + arr = DatumGetArrayTypeP(arrayKey->sk_argument); + num_prefixes = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + if (num_prefixes < 2) + return false; + + /* Get array element type info */ + elemtype = ARR_ELEMTYPE(arr); + get_typlenbyvalalign(elemtype, &elemlen, &elembyval, &elemalign); + + /* Deconstruct the array into individual elements */ + deconstruct_array(arr, elemtype, elemlen, elembyval, elemalign, + &prefix_values, &prefix_nulls, &num_prefixes); + + /* Attribute numbers (1-based) */ + prefix_attno = 1; + suffix_attno = 2; + + /* Get the opfamily from the index */ + opfamily = rel->rd_opfamily[suffix_attno - 1]; + + /* Get collation from the suffix column */ + suffix_collation = TupleDescAttr(itupdesc, suffix_attno - 1)->attcollation; + + /* Get the comparison function OID for the suffix column */ + suffix_cmp_oid = get_opfamily_proc(opfamily, + TupleDescAttr(itupdesc, suffix_attno - 1)->atttypid, + TupleDescAttr(itupdesc, suffix_attno - 1)->atttypid, + BTORDER_PROC); + + if (!OidIsValid(suffix_cmp_oid)) + { + pfree(prefix_values); + pfree(prefix_nulls); + return false; + } + + /* Initialize the merge scan state */ + so->mergeState = bt_merge_init(scan, + prefix_values, + prefix_nulls, + num_prefixes, + prefix_attno, + suffix_attno, + suffix_cmp_oid, + suffix_collation); + + pfree(prefix_values); + pfree(prefix_nulls); + + return (so->mergeState != NULL); +} + /* * btgettuple() -- Get the next tuple in the scan. */ @@ -235,6 +340,24 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) /* btree indexes are never lossy */ scan->xs_recheck = false; + /* + * Check if merge scan optimization should be used. + * Initialize merge scan state on first call if needed. + */ + if (scan->xs_num_merge_prefixes > 0 && so->mergeState == NULL) + { + if (!bt_init_merge_scan_from_keys(scan)) + { + /* Merge scan init failed, fall through to regular scan */ + scan->xs_num_merge_prefixes = 0; + } + } + + /* Use merge scan if initialized */ + /* Use merge scan if initialized */ + if (so->mergeState != NULL) + return bt_merge_getnext(scan, dir); + /* Each loop iteration performs another primitive index scan */ do { @@ -365,6 +488,9 @@ btbeginscan(Relation rel, int nkeys, int norderbys) so->killedItems = NULL; /* until needed */ so->numKilled = 0; + /* Initialize merge scan state to NULL */ + so->mergeState = NULL; + /* * We don't know yet whether the scan will be index-only, so we do not * allocate the tuple workspace arrays until btrescan. However, we set up @@ -486,6 +612,9 @@ btendscan(IndexScanDesc scan) pfree(so->killedItems); if (so->currTuples != NULL) pfree(so->currTuples); + /* Clean up merge scan state */ + if (so->mergeState != NULL) + bt_merge_end(so->mergeState); /* so->markTuples should not be pfree'd, see btrescan */ pfree(so); } diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index c2d09374517..70483c4e767 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -98,6 +98,7 @@ IndexOnlyNext(IndexOnlyScanState *node) node->ioss_ScanDesc = scandesc; + scandesc->xs_num_merge_prefixes = node->ioss_NumMergePrefixes; /* Set it up for index-only scan */ node->ioss_ScanDesc->xs_want_itup = true; @@ -615,7 +616,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) indexstate->ioss_RuntimeKeysReady = false; indexstate->ioss_RuntimeKeys = NULL; indexstate->ioss_NumRuntimeKeys = 0; - + indexstate->ioss_NumMergePrefixes = node->num_merge_prefixes; /* * build the index scan keys from the index qualification */ @@ -790,6 +791,7 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, node->ioss_NumOrderByKeys, piscan); node->ioss_ScanDesc->xs_want_itup = true; + node->ioss_ScanDesc->xs_num_merge_prefixes = node->ioss_NumMergePrefixes; node->ioss_VMBuffer = InvalidBuffer; /* @@ -856,6 +858,7 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, node->ioss_NumOrderByKeys, piscan); node->ioss_ScanDesc->xs_want_itup = true; + node->ioss_ScanDesc->xs_num_merge_prefixes = node->ioss_NumMergePrefixes; /* * If no run-time keys to calculate or they are ready, go ahead and pass diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index a616abff04c..9e62cacd2d3 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -115,6 +115,7 @@ IndexNext(IndexScanState *node) node->iss_ScanDesc = scandesc; + scandesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes; /* * If no run-time keys to calculate or they are ready, go ahead and * pass the scankeys to the index AM. @@ -211,6 +212,8 @@ IndexNextWithReorder(IndexScanState *node) node->iss_ScanDesc = scandesc; + scandesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes; + /* * If no run-time keys to calculate or they are ready, go ahead and * pass the scankeys to the index AM. @@ -1086,6 +1089,11 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_RuntimeContext = NULL; } + /* + * Initialize merge scan state from plan node + */ + indexstate->iss_NumMergePrefixes = node->num_merge_prefixes; + /* * all done. */ @@ -1725,6 +1733,8 @@ ExecIndexScanInitializeDSM(IndexScanState *node, node->iss_NumOrderByKeys, piscan); + node->iss_ScanDesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes; + /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. @@ -1789,6 +1799,7 @@ ExecIndexScanInitializeWorker(IndexScanState *node, node->iss_NumOrderByKeys, piscan); + node->iss_ScanDesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes; /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 67d9dc35f44..44b79f91335 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -16,6 +16,7 @@ #include "postgres.h" #include "access/stratnum.h" +#include "utils/array.h" #include "access/sysattr.h" #include "access/transam.h" #include "catalog/pg_am.h" @@ -102,6 +103,8 @@ static bool eclass_already_used(EquivalenceClass *parent_ec, Relids oldrelids, static void get_index_paths(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index, IndexClauseSet *clauses, List **bitindexpaths); +static void consider_merge_scan_path(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, IndexClauseSet *clauses); static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index, IndexClauseSet *clauses, bool useful_predicate, @@ -770,6 +773,191 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, NULL); *bitindexpaths = list_concat(*bitindexpaths, indexpaths); } + + /* + * Consider merge scan optimization for queries with: + * - ScalarArrayOpExpr (IN clause) on first index column + * - ORDER BY on second column (different from index leading column) + * - Optionally LIMIT + */ + consider_merge_scan_path(root, rel, index, clauses); +} + +/* + * consider_merge_scan_path + * Check if this index can provide a merge scan path for queries of the form: + * WHERE prefix IN (...) AND suffix >= b ORDER BY suffix, prefix LIMIT N + * + * Merge scan allows lazily producing output sorted by (suffix, prefix) from + * an index on (prefix, suffix) by doing a K-way merge of K separate scans. + */ +static void +consider_merge_scan_path(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, IndexClauseSet *clauses) +{ + IndexPath *ipath; + List *index_clauses; + List *index_pathkeys; + List *merge_pathkeys; + ListCell *lc; + int num_prefixes = 0; + int indexcol; + bool has_saop_on_first = false; + bool has_clause_on_second = false; + + /* Need at least 2 index columns for merge scan */ + if (index->nkeycolumns < 2) + return; + + /* Index must be ordered and support gettuple */ + if (index->sortopfamily == NULL || !index->amhasgettuple) + return; + + /* Must have query pathkeys with at least 2 elements */ + if (root->query_pathkeys == NIL || list_length(root->query_pathkeys) < 2) + return; + + /* + * Check for ScalarArrayOpExpr on first column. + * Count the number of array elements (prefix values). + */ + foreach(lc, clauses->indexclauses[0]) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + RestrictInfo *rinfo = iclause->rinfo; + + if (IsA(rinfo->clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause; + Node *arrayarg = (Node *) lsecond(saop->args); + + has_saop_on_first = true; + + /* Try to determine the number of array elements */ + if (IsA(arrayarg, Const)) + { + Const *con = (Const *) arrayarg; + + if (!con->constisnull) + { + ArrayType *arr = DatumGetArrayTypeP(con->constvalue); + num_prefixes = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + } + } + else + { + /* Can't determine size, estimate conservatively */ + num_prefixes = 10; + } + break; + } + } + + if (!has_saop_on_first || num_prefixes < 2) + return; + + /* Check if there's any clause on second column */ + if (clauses->indexclauses[1] != NIL) + has_clause_on_second = true; + + if (!has_clause_on_second) + return; + + /* + * Get the natural index pathkeys (prefix, suffix order). + * We need at least 2 pathkeys for merge scan to make sense. + */ + index_pathkeys = build_index_pathkeys(root, index, ForwardScanDirection); + if (list_length(index_pathkeys) < 2) + return; + + /* + * Check if query pathkeys are (suffix, prefix) - the REVERSED order. + * query_pathkeys[0] should match index_pathkeys[1] (suffix) + * query_pathkeys[1] should match index_pathkeys[0] (prefix) + */ + { + PathKey *qpk0 = (PathKey *) linitial(root->query_pathkeys); + PathKey *qpk1 = (PathKey *) lsecond(root->query_pathkeys); + PathKey *ipk0 = (PathKey *) linitial(index_pathkeys); + PathKey *ipk1 = (PathKey *) lsecond(index_pathkeys); + + /* Query's first pathkey must match index's SECOND pathkey (suffix) */ + if (qpk0->pk_eclass != ipk1->pk_eclass) + return; + + /* Query's second pathkey must match index's FIRST pathkey (prefix) */ + if (qpk1->pk_eclass != ipk0->pk_eclass) + return; + } + + /* + * The merge scan can satisfy the query's ORDER BY (suffix, prefix). + * Use the query's pathkeys directly since we've verified they match. + * This is critical: PostgreSQL compares pathkeys by pointer equality. + */ + merge_pathkeys = root->query_pathkeys; + + /* + * Build the index clause list (same as normal path). + */ + index_clauses = NIL; + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) + { + foreach(lc, clauses->indexclauses[indexcol]) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + index_clauses = lappend(index_clauses, iclause); + } + } + + /* + * Create the merge scan path with (suffix, prefix) pathkeys. + */ + ipath = create_index_path(root, index, + index_clauses, + NIL, /* no ORDER BY expressions */ + NIL, /* no ORDER BY columns */ + merge_pathkeys, + ForwardScanDirection, + check_index_only(rel, index), + NULL, /* no outer relids */ + 1.0, /* loop_count */ + false); /* not parallel */ + + /* Enable merge scan with K-way merge */ + ipath->num_merge_prefixes = num_prefixes; + + /* + * Adjust costs and row estimate for merge scan. + * Merge scan reads exactly (limit + K - 1) tuples instead of all matching. + * The row estimate reflects actual tuple accesses, not total matches. + */ + if (root->limit_tuples > 0 && root->limit_tuples < ipath->path.rows) + { + double merge_rows; + double original_rows = ipath->path.rows; + + /* Merge scan reads exactly (limit + K - 1) tuples */ + merge_rows = root->limit_tuples + num_prefixes - 1; + if (merge_rows < original_rows) + { + double ratio = merge_rows / original_rows; + + /* Scale run cost by ratio of tuples accessed */ + ipath->path.total_cost = ipath->path.startup_cost + + (ipath->path.total_cost - ipath->path.startup_cost) * ratio; + + /* Add startup cost for K index descents */ + ipath->path.startup_cost += num_prefixes * 0.01 * cpu_operator_cost; + + /* Update row estimate to reflect merge scan efficiency */ + ipath->path.rows = merge_rows; + } + } + + /* Submit the path for consideration */ + add_path(rel, (Path *) ipath); } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index e5200f4b3ce..485b4b3e54e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -184,12 +184,14 @@ static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig, List *indexorderby, List *indexorderbyorig, List *indexorderbyops, + int num_merge_prefixes, ScanDirection indexscandir); static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *recheckqual, List *indexorderby, List *indextlist, + int num_merge_prefixes, ScanDirection indexscandir); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, List *indexqual, @@ -3009,6 +3011,7 @@ create_indexscan_plan(PlannerInfo *root, stripped_indexquals, fixed_indexorderbys, indexinfo->indextlist, + best_path->num_merge_prefixes, best_path->indexscandir); else scan_plan = (Scan *) make_indexscan(tlist, @@ -3020,6 +3023,7 @@ create_indexscan_plan(PlannerInfo *root, fixed_indexorderbys, indexorderbys, indexorderbyops, + best_path->num_merge_prefixes, best_path->indexscandir); copy_generic_path_info(&scan_plan->plan, &best_path->path); @@ -5527,6 +5531,7 @@ make_indexscan(List *qptlist, List *indexorderby, List *indexorderbyorig, List *indexorderbyops, + int num_merge_prefixes, ScanDirection indexscandir) { IndexScan *node = makeNode(IndexScan); @@ -5543,6 +5548,7 @@ make_indexscan(List *qptlist, node->indexorderby = indexorderby; node->indexorderbyorig = indexorderbyorig; node->indexorderbyops = indexorderbyops; + node->num_merge_prefixes = num_merge_prefixes; node->indexorderdir = indexscandir; return node; @@ -5557,6 +5563,7 @@ make_indexonlyscan(List *qptlist, List *recheckqual, List *indexorderby, List *indextlist, + int num_merge_prefixes, ScanDirection indexscandir) { IndexOnlyScan *node = makeNode(IndexOnlyScan); @@ -5572,6 +5579,7 @@ make_indexonlyscan(List *qptlist, node->recheckqual = recheckqual; node->indexorderby = indexorderby; node->indextlist = indextlist; + node->num_merge_prefixes = num_merge_prefixes; node->indexorderdir = indexscandir; return node; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 7b6c5d51e5d..21746cd684c 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1075,6 +1075,8 @@ create_index_path(PlannerInfo *root, pathnode->indexorderbycols = indexorderbycols; pathnode->indexscandir = indexscandir; + pathnode->num_merge_prefixes = 0; + cost_index(pathnode, root, loop_count, partial_path); return pathnode; diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index ce340c076f8..fc55315ee07 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -190,6 +190,9 @@ typedef struct IndexScanDescData /* parallel index scan information, in shared memory */ struct ParallelIndexScanDescData *parallel_scan; + + /* Merge scan: K-way merge, ordered by an index suffix */ + int xs_num_merge_prefixes; } IndexScanDescData; /* Generic structure for parallel scans */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index f8053d9e572..4433d1c2612 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1734,6 +1734,9 @@ typedef struct IndexScanState bool *iss_OrderByTypByVals; int16 *iss_OrderByTypLens; Size iss_PscanLen; + + /* Merge scan: K-way merge */ + int iss_NumMergePrefixes; } IndexScanState; /* ---------------- @@ -1780,6 +1783,8 @@ typedef struct IndexOnlyScanState Size ioss_PscanLen; AttrNumber *ioss_NameCStringAttNums; int ioss_NameCStringCount; + /* Merge scan: K-way merge */ + int ioss_NumMergePrefixes; } IndexOnlyScanState; /* ---------------- diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index fb808823acf..ced7e224a87 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2040,6 +2040,7 @@ typedef struct IndexPath ScanDirection indexscandir; Cost indextotalcost; Selectivity indexselectivity; + int num_merge_prefixes; } IndexPath; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 4bc6fb5670e..86d8c92e01f 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -597,6 +597,8 @@ typedef struct IndexScan List *indexorderbyops; /* forward or backward or don't care */ ScanDirection indexorderdir; + /* Merge scan: K-way merge */ + int num_merge_prefixes; } IndexScan; /* ---------------- @@ -645,6 +647,8 @@ typedef struct IndexOnlyScan List *indextlist; /* forward or backward or don't care */ ScanDirection indexorderdir; + /* Merge scan: K-way merge */ + int num_merge_prefixes; } IndexOnlyScan; /* ---------------- diff --git a/src/test/regress/expected/btree_merge.out b/src/test/regress/expected/btree_merge.out index 441ae1d0657..28509b331d7 100644 --- a/src/test/regress/expected/btree_merge.out +++ b/src/test/regress/expected/btree_merge.out @@ -82,6 +82,20 @@ SHOW track_counts; -- should be 'on' on (1 row) +-- Verify merge scan is used: no Sort node, rows=10 (N + K - 1 = 3 + 8 - 1) +EXPLAIN (COSTS OFF) +SELECT x, y +FROM btree_merge_test +WHERE x IN (1,2,5,8,13,21,34,55) AND y >= 19 +ORDER BY y, x +LIMIT 3; + QUERY PLAN +------------------------------------------------------------------------------------ + Limit + -> Index Only Scan using btree_merge_test_idx on btree_merge_test + Index Cond: ((x = ANY ('{1,2,5,8,13,21,34,55}'::integer[])) AND (y >= 19)) +(3 rows) + -- From the limited query proposition this can be computed with 10 -- tupple accesses. SELECT x, y @@ -107,7 +121,7 @@ FROM pg_stat_user_indexes WHERE indexrelname = 'btree_merge_test_idx'; idx_scan | idx_tup_read | idx_tup_fetch ----------+--------------+--------------- - 5 | 10 | 10 + 8 | 9 | 3 (1 row) DROP TABLE btree_merge_test; diff --git a/src/test/regress/sql/btree_merge.sql b/src/test/regress/sql/btree_merge.sql index be00c33c2a5..ad9cf03f869 100644 --- a/src/test/regress/sql/btree_merge.sql +++ b/src/test/regress/sql/btree_merge.sql @@ -81,6 +81,15 @@ ANALYSE btree_merge_test; SET enable_seqscan = OFF; SET enable_bitmapscan = OFF; SHOW track_counts; -- should be 'on' + +-- Verify merge scan is used: no Sort node, rows=10 (N + K - 1 = 3 + 8 - 1) +EXPLAIN (COSTS OFF) +SELECT x, y +FROM btree_merge_test +WHERE x IN (1,2,5,8,13,21,34,55) AND y >= 19 +ORDER BY y, x +LIMIT 3; + -- From the limited query proposition this can be computed with 10 -- tupple accesses. SELECT x, y -- 2.40.0