From deb986653ba56f0580b03484e3f4a22716bac9f7 Mon Sep 17 00:00:00 2001 From: Maxime Schoemans Date: Mon, 13 Apr 2026 16:06:03 +0200 Subject: [PATCH v7 2/3] Improve multirange join selectivity estimation for <<, >>, && Add multirangejoinsel() to estimate join selectivity for multirange operators using bound histograms, covering all type combinations: multirange vs multirange, multirange vs range, range vs multirange. Note that multirange statistics only represent the outermost bounds (see multirange_typanalyze), so && may overestimate overlap for sparse multiranges. This is consistent with how existing restriction selectivity handles multirange &&. The shared helper functions (calc_hist_join_selectivity and others) are intentionally duplicated from rangetypes_selfuncs.c for reviewability. A follow-up commit will remove the duplication. --- .../utils/adt/multirangetypes_selfuncs.c | 325 ++++++++++++++++++ src/include/catalog/pg_operator.dat | 18 +- src/include/catalog/pg_proc.dat | 4 + src/test/regress/expected/multirangetypes.out | 157 +++++++++ src/test/regress/sql/multirangetypes.sql | 72 ++++ 5 files changed, 567 insertions(+), 9 deletions(-) diff --git a/src/backend/utils/adt/multirangetypes_selfuncs.c b/src/backend/utils/adt/multirangetypes_selfuncs.c index 533111445e7..241f8c6dbe0 100644 --- a/src/backend/utils/adt/multirangetypes_selfuncs.c +++ b/src/backend/utils/adt/multirangetypes_selfuncs.c @@ -1334,3 +1334,328 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache, return sum_frac; } + +/* + * Estimate join selectivity P(X < Y) using rangebound histograms. + * + * Based on: Diogo Repas, Zhicheng Luo, Maxime Schoemans, Mahmoud Sakr, 2022 + * "Selectivity Estimation of Inequality Joins In Databases" + * https://doi.org/10.48550/arXiv.2206.07396 + * + * hist1 and hist2 are arrays of RangeBound entries from the bounds histograms + * of two range-typed or multirange-typed attributes X and Y, respectively. + * Each array has at least 2 entries (one histogram bin). The entries carry + * full bound metadata (lower/upper flag, inclusive/exclusive), and all + * comparisons use range_cmp_bounds() so that bound semantics are preserved. + * + * The algorithm models each attribute's distribution as a piecewise function + * derived from its histogram, then computes: + * P(X < Y) = 0.5 * sum( (F_X(prev) + F_X(cur)) * (F_Y(cur) - F_Y(prev)) ) + * by parallel-scanning both histograms. + * + * The initial fast-forward loops skip histogram entries that fall entirely + * before the other histogram's range, so the main loop only processes the + * overlapping region. Bounds checks are required because the histograms may + * be completely disjoint (e.g., all of X is below all of Y). + */ +static double +calc_hist_join_selectivity(TypeCacheEntry *typcache, + const RangeBound *hist1, int nhist1, + const RangeBound *hist2, int nhist2) +{ + int i, + j; + double selectivity = 0.0; + double prev_sel1 = -1.0; /* negative sentinel skips first iter */ + double prev_sel2 = 0.0; + + Assert(nhist1 > 1); + Assert(nhist2 > 1); + + /* + * Fast-forward past hist1 entries that are entirely below hist2[0], and + * vice versa. Bounds checks prevent out-of-bounds access when the + * histograms are fully disjoint. + */ + for (i = 0; i < nhist1 && + range_cmp_bounds(typcache, &hist1[i], &hist2[0]) < 0; i++) + ; + for (j = 0; j < nhist2 && + range_cmp_bounds(typcache, &hist2[j], &hist1[0]) < 0; j++) + ; + + /* + * Handle fully-separated histograms. When all bounds in hist1 are below + * all bounds in hist2, P(X < Y) is ~1.0. When all of hist2 is below + * hist1, P(X < Y) is ~0.0. We return immediately rather than falling + * into the overlap walk with invalid indices. + */ + if (i >= nhist1) + return 1.0; + if (j >= nhist2) + return 0.0; + + /* Walk the overlapping region of both histograms */ + while (i < nhist1 && j < nhist2) + { + double cur_sel1, + cur_sel2; + RangeBound cur_sync; + int cmp; + + cmp = range_cmp_bounds(typcache, &hist1[i], &hist2[j]); + if (cmp < 0) + cur_sync = hist1[i++]; + else if (cmp > 0) + cur_sync = hist2[j++]; + else + { + /* Equal bounds: advance both */ + cur_sync = hist1[i]; + i++; + j++; + } + cur_sel1 = calc_hist_selectivity_scalar(typcache, &cur_sync, + hist1, nhist1, false); + cur_sel2 = calc_hist_selectivity_scalar(typcache, &cur_sync, + hist2, nhist2, false); + + /* Skip the first iteration (no previous point yet) */ + if (prev_sel1 >= 0) + selectivity += (prev_sel1 + cur_sel1) * (cur_sel2 - prev_sel2); + + prev_sel1 = cur_sel1; + prev_sel2 = cur_sel2; + } + + /* P(X < Y) = 0.5 * Sum(...) */ + selectivity /= 2; + + /* Include remainder of hist2 if hist1 was exhausted first */ + if (j < nhist2) + selectivity += 1 - prev_sel2; + + return selectivity; +} + +/* + * multirangejoinsel -- join selectivity for multirange operators + * + * Supports: <<, >>, && for all type combinations: + * multirange vs multirange, multirange vs range, range vs multirange + * + * These operators map directly to strict bound comparisons P(X < Y), + * which calc_hist_join_selectivity() estimates from bound histograms. + * Both range and multirange types store bound histograms in the same + * format, so the estimation is identical regardless of type combination. + */ +Datum +multirangejoinsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); + VariableStatData vardata1; + VariableStatData vardata2; + Selectivity selec; + AttStatsSlot hist1; + AttStatsSlot hist2; + AttStatsSlot sslot; + bool have_hist1 = false; + bool have_hist2 = false; + TypeCacheEntry *typcache; + TypeCacheEntry *rng_typcache; + Form_pg_statistic stats1; + Form_pg_statistic stats2; + double empty_frac1; + double empty_frac2; + double null_frac1; + double null_frac2; + int nhist1; + int nhist2; + RangeBound *hist1_lower; + RangeBound *hist1_upper; + RangeBound *hist2_lower; + RangeBound *hist2_upper; + bool join_is_reversed; + bool empty; + int i; + + get_join_variables(root, args, sjinfo, &vardata1, &vardata2, + &join_is_reversed); + + selec = default_multirange_selectivity(operator); + + /* + * Acquire histogram stats for both sides. Each slot is tracked + * independently so we can release exactly what was acquired on any + * failure path. + */ + if (!HeapTupleIsValid(vardata1.statsTuple) || + !HeapTupleIsValid(vardata2.statsTuple)) + goto cleanup; + + memset(&hist1, 0, sizeof(hist1)); + memset(&hist2, 0, sizeof(hist2)); + + if (!get_attstatsslot(&hist1, vardata1.statsTuple, + STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES)) + goto cleanup; + have_hist1 = true; + + if (!get_attstatsslot(&hist2, vardata2.statsTuple, + STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES)) + goto cleanup; + have_hist2 = true; + + /* + * Determine the range type cache for bound comparisons. At least one + * side is a multirange type; try vardata1 first, then vardata2. + */ + typcache = lookup_type_cache(vardata1.vartype, TYPECACHE_MULTIRANGE_INFO); + if (typcache->rngtype != NULL) + rng_typcache = typcache->rngtype; + else + { + typcache = lookup_type_cache(vardata2.vartype, + TYPECACHE_MULTIRANGE_INFO); + rng_typcache = typcache->rngtype; + } + + /* Look up NULL and empty-range fractions */ + stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple); + stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple); + + null_frac1 = stats1->stanullfrac; + null_frac2 = stats2->stanullfrac; + + /* Try to get fraction of empty ranges for the first variable */ + if (get_attstatsslot(&sslot, vardata1.statsTuple, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + InvalidOid, ATTSTATSSLOT_NUMBERS)) + { + if (sslot.nnumbers != 1) + elog(ERROR, "invalid empty fraction statistic"); + empty_frac1 = sslot.numbers[0]; + free_attstatsslot(&sslot); + } + else + { + empty_frac1 = 0.0; + } + + /* Try to get fraction of empty ranges for the second variable */ + if (get_attstatsslot(&sslot, vardata2.statsTuple, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + InvalidOid, ATTSTATSSLOT_NUMBERS)) + { + if (sslot.nnumbers != 1) + elog(ERROR, "invalid empty fraction statistic"); + empty_frac2 = sslot.numbers[0]; + free_attstatsslot(&sslot); + } + else + { + empty_frac2 = 0.0; + } + + /* Convert range histograms to separate lower/upper bound arrays */ + nhist1 = hist1.nvalues; + hist1_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist1); + hist1_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist1); + for (i = 0; i < nhist1; i++) + { + range_deserialize(rng_typcache, DatumGetRangeTypeP(hist1.values[i]), + &hist1_lower[i], &hist1_upper[i], &empty); + if (empty) + elog(ERROR, "bounds histogram contains an empty range"); + } + + nhist2 = hist2.nvalues; + hist2_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist2); + hist2_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist2); + for (i = 0; i < nhist2; i++) + { + range_deserialize(rng_typcache, DatumGetRangeTypeP(hist2.values[i]), + &hist2_lower[i], &hist2_upper[i], &empty); + if (empty) + elog(ERROR, "bounds histogram contains an empty range"); + } + + /* Estimate selectivity based on the operator */ + switch (operator) + { + case OID_RANGE_OVERLAPS_MULTIRANGE_OP: + case OID_MULTIRANGE_OVERLAPS_RANGE_OP: + case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP: + + /* + * A && B iff NOT(A << B) AND NOT(A >> B) = 1 - P(A.upper < + * B.lower) - P(B.upper < A.lower) + * + * This decomposition is exact for single ranges. For + * multiranges, the bound histograms only represent the outermost + * lower and upper bounds (see multirange_typanalyze), so internal + * gaps are not captured. This can overestimate overlap for sparse + * multiranges, but is consistent with how existing restriction + * selectivity handles multirange &&. + */ + selec = 1; + selec -= calc_hist_join_selectivity(rng_typcache, + hist1_upper, nhist1, + hist2_lower, nhist2); + selec -= calc_hist_join_selectivity(rng_typcache, + hist2_upper, nhist2, + hist1_lower, nhist1); + break; + + case OID_RANGE_LEFT_MULTIRANGE_OP: + case OID_MULTIRANGE_LEFT_RANGE_OP: + case OID_MULTIRANGE_LEFT_MULTIRANGE_OP: + /* A << B iff upper(A) < lower(B) */ + selec = calc_hist_join_selectivity(rng_typcache, + hist1_upper, nhist1, + hist2_lower, nhist2); + break; + + case OID_RANGE_RIGHT_MULTIRANGE_OP: + case OID_MULTIRANGE_RIGHT_RANGE_OP: + case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP: + /* A >> B iff upper(B) < lower(A) */ + selec = calc_hist_join_selectivity(rng_typcache, + hist2_upper, nhist2, + hist1_lower, nhist1); + break; + + default: + /* Unsupported operator; keep the default selectivity */ + goto cleanup; + } + + /* The histogram-based selectivity applies to non-empty ranges only */ + selec *= (1 - empty_frac1) * (1 - empty_frac2); + + /* + * For the supported operators (<<, >>, &&), empty ranges always produce + * false, so no empty-fraction adjustment is needed. + */ + + /* All multirange operators are strict */ + selec *= (1 - null_frac1) * (1 - null_frac2); + +cleanup: + if (have_hist2) + free_attstatsslot(&hist2); + if (have_hist1) + free_attstatsslot(&hist1); + + ReleaseVariableStats(vardata1); + ReleaseVariableStats(vardata2); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 5ea4434f9fa..28f696a9f41 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -3302,19 +3302,19 @@ oprname => '&&', oprleft => 'anyrange', oprright => 'anymultirange', oprresult => 'bool', oprcom => '&&(anymultirange,anyrange)', oprcode => 'range_overlaps_multirange', oprrest => 'multirangesel', - oprjoin => 'areajoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '2867', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_RANGE_OP', descr => 'overlaps', oprname => '&&', oprleft => 'anymultirange', oprright => 'anyrange', oprresult => 'bool', oprcom => '&&(anyrange,anymultirange)', oprcode => 'multirange_overlaps_range', oprrest => 'multirangesel', - oprjoin => 'areajoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '2868', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP', descr => 'overlaps', oprname => '&&', oprleft => 'anymultirange', oprright => 'anymultirange', oprresult => 'bool', oprcom => '&&(anymultirange,anymultirange)', oprcode => 'multirange_overlaps_multirange', oprrest => 'multirangesel', - oprjoin => 'areajoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '2869', oid_symbol => 'OID_MULTIRANGE_CONTAINS_ELEM_OP', descr => 'contains', oprname => '@>', oprleft => 'anymultirange', oprright => 'anyelement', @@ -3428,37 +3428,37 @@ oprname => '<<', oprleft => 'anyrange', oprright => 'anymultirange', oprresult => 'bool', oprcom => '>>(anymultirange,anyrange)', oprcode => 'range_before_multirange', oprrest => 'multirangesel', - oprjoin => 'scalarltjoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '4396', oid_symbol => 'OID_MULTIRANGE_LEFT_RANGE_OP', descr => 'is left of', oprname => '<<', oprleft => 'anymultirange', oprright => 'anyrange', oprresult => 'bool', oprcom => '>>(anyrange,anymultirange)', oprcode => 'multirange_before_range', oprrest => 'multirangesel', - oprjoin => 'scalarltjoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '4397', oid_symbol => 'OID_MULTIRANGE_LEFT_MULTIRANGE_OP', descr => 'is left of', oprname => '<<', oprleft => 'anymultirange', oprright => 'anymultirange', oprresult => 'bool', oprcom => '>>(anymultirange,anymultirange)', oprcode => 'multirange_before_multirange', oprrest => 'multirangesel', - oprjoin => 'scalarltjoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '4398', oid_symbol => 'OID_RANGE_RIGHT_MULTIRANGE_OP', descr => 'is right of', oprname => '>>', oprleft => 'anyrange', oprright => 'anymultirange', oprresult => 'bool', oprcom => '<<(anymultirange,anyrange)', oprcode => 'range_after_multirange', oprrest => 'multirangesel', - oprjoin => 'scalargtjoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '4399', oid_symbol => 'OID_MULTIRANGE_RIGHT_RANGE_OP', descr => 'is right of', oprname => '>>', oprleft => 'anymultirange', oprright => 'anyrange', oprresult => 'bool', oprcom => '<<(anyrange,anymultirange)', oprcode => 'multirange_after_range', oprrest => 'multirangesel', - oprjoin => 'scalargtjoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '4400', oid_symbol => 'OID_MULTIRANGE_RIGHT_MULTIRANGE_OP', descr => 'is right of', oprname => '>>', oprleft => 'anymultirange', oprright => 'anymultirange', oprresult => 'bool', oprcom => '<<(anymultirange,anymultirange)', oprcode => 'multirange_after_multirange', oprrest => 'multirangesel', - oprjoin => 'scalargtjoinsel' }, + oprjoin => 'multirangejoinsel' }, { oid => '8262', descr => 'equal', oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'oid8', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index c6a707acae4..10fbc22c4a6 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12923,4 +12923,8 @@ proname => 'rangejoinsel', provolatile => 's', prorettype => 'float8', proargtypes => 'internal oid internal int2 internal', prosrc => 'rangejoinsel' }, +{ oid => '8356', descr => 'join selectivity for multirange operators', + proname => 'multirangejoinsel', provolatile => 's', prorettype => 'float8', + proargtypes => 'internal oid internal int2 internal', + prosrc => 'multirangejoinsel' }, ] diff --git a/src/test/regress/expected/multirangetypes.out b/src/test/regress/expected/multirangetypes.out index f5e7df8df43..aab9c5e2604 100644 --- a/src/test/regress/expected/multirangetypes.out +++ b/src/test/regress/expected/multirangetypes.out @@ -3512,3 +3512,160 @@ create function mr_table_fail(i anyelement) returns table(i anyelement, r anymul as $$ select $1, '[1,10]' $$ language sql; ERROR: cannot determine result data type DETAIL: A result of type anymultirange requires at least one input of type anyrange or anymultirange. +-- Restore GUCs changed by earlier index tests +RESET enable_seqscan; +RESET enable_indexscan; +RESET enable_bitmapscan; +-- +-- test selectivity of multirange join operators +-- +create table test_mr_join_1 (mr1 int4multirange); +create table test_mr_join_2 (mr2 int4multirange); +create table test_mr_join_3 (mr3 int4multirange); +insert into test_mr_join_1 select int4multirange(int4range(g, g+10)) from generate_series(1, 1000) g; +insert into test_mr_join_1 select int4multirange(int4range(g, g+100)) from generate_series(1, 1000, 10) g; +insert into test_mr_join_2 select int4multirange(int4range(g, g+10)) from generate_series(1, 500) g; +insert into test_mr_join_2 select int4multirange(int4range(g, g+100)) from generate_series(1, 500, 10) g; +insert into test_mr_join_3 select int4multirange(int4range(g, g+10)) from generate_series(501, 1000) g; +insert into test_mr_join_3 select int4multirange(int4range(g, g+100)) from generate_series(501, 1000, 10) g; +analyze test_mr_join_1; +analyze test_mr_join_2; +analyze test_mr_join_3; +-- multirange vs multirange: reorder joins based on computed selectivity +explain (costs off) select count(*) from test_mr_join_1, test_mr_join_2, test_mr_join_3 where mr1 && mr2 and mr2 && mr3; + QUERY PLAN +----------------------------------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (test_mr_join_1.mr1 && test_mr_join_2.mr2) + -> Seq Scan on test_mr_join_1 + -> Materialize + -> Nested Loop + Join Filter: (test_mr_join_2.mr2 && test_mr_join_3.mr3) + -> Seq Scan on test_mr_join_2 + -> Materialize + -> Seq Scan on test_mr_join_3 +(10 rows) + +explain (costs off) select count(*) from test_mr_join_1, test_mr_join_2, test_mr_join_3 where mr1 << mr2 and mr2 << mr3; + QUERY PLAN +----------------------------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (test_mr_join_2.mr2 << test_mr_join_3.mr3) + -> Nested Loop + Join Filter: (test_mr_join_1.mr1 << test_mr_join_2.mr2) + -> Seq Scan on test_mr_join_1 + -> Materialize + -> Seq Scan on test_mr_join_2 + -> Materialize + -> Seq Scan on test_mr_join_3 +(10 rows) + +explain (costs off) select count(*) from test_mr_join_1, test_mr_join_2, test_mr_join_3 where mr1 >> mr2 and mr2 >> mr3; + QUERY PLAN +----------------------------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (test_mr_join_1.mr1 >> test_mr_join_2.mr2) + -> Nested Loop + Join Filter: (test_mr_join_2.mr2 >> test_mr_join_3.mr3) + -> Seq Scan on test_mr_join_2 + -> Materialize + -> Seq Scan on test_mr_join_3 + -> Seq Scan on test_mr_join_1 +(9 rows) + +drop table test_mr_join_1; +drop table test_mr_join_2; +drop table test_mr_join_3; +-- +-- test multirange join selectivity with fully disjoint histograms +-- +create table test_mr_join_lo (r int4multirange); +create table test_mr_join_hi (r int4multirange); +insert into test_mr_join_lo select int4multirange(int4range(g, g+10)) from generate_series(1, 500) g; +insert into test_mr_join_hi select int4multirange(int4range(g, g+10)) from generate_series(10001, 10500) g; +analyze test_mr_join_lo; +analyze test_mr_join_hi; +-- These should not crash and should produce stable plans. +explain (costs off) select count(*) from test_mr_join_lo a, test_mr_join_hi b where a.r << b.r; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (a.r << b.r) + -> Seq Scan on test_mr_join_lo a + -> Materialize + -> Seq Scan on test_mr_join_hi b +(6 rows) + +explain (costs off) select count(*) from test_mr_join_lo a, test_mr_join_hi b where a.r >> b.r; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (a.r >> b.r) + -> Seq Scan on test_mr_join_lo a + -> Materialize + -> Seq Scan on test_mr_join_hi b +(6 rows) + +explain (costs off) select count(*) from test_mr_join_lo a, test_mr_join_hi b where a.r && b.r; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (a.r && b.r) + -> Seq Scan on test_mr_join_lo a + -> Materialize + -> Seq Scan on test_mr_join_hi b +(6 rows) + +drop table test_mr_join_lo; +drop table test_mr_join_hi; +-- +-- test range vs multirange join selectivity +-- +create table test_mr_join_r (r int4range); +create table test_mr_join_mr (mr int4multirange); +insert into test_mr_join_r select int4range(g, g+10) from generate_series(1, 500) g; +insert into test_mr_join_mr select int4multirange(int4range(g, g+10)) from generate_series(10001, 10500) g; +analyze test_mr_join_r; +analyze test_mr_join_mr; +-- range vs multirange operators should use multirangejoinsel +explain (costs off) select count(*) from test_mr_join_r a, test_mr_join_mr b where a.r << b.mr; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (a.r << b.mr) + -> Seq Scan on test_mr_join_r a + -> Materialize + -> Seq Scan on test_mr_join_mr b +(6 rows) + +explain (costs off) select count(*) from test_mr_join_r a, test_mr_join_mr b where a.r >> b.mr; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (a.r >> b.mr) + -> Seq Scan on test_mr_join_r a + -> Materialize + -> Seq Scan on test_mr_join_mr b +(6 rows) + +explain (costs off) select count(*) from test_mr_join_r a, test_mr_join_mr b where a.r && b.mr; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (a.r && b.mr) + -> Seq Scan on test_mr_join_r a + -> Materialize + -> Seq Scan on test_mr_join_mr b +(6 rows) + +drop table test_mr_join_r; +drop table test_mr_join_mr; diff --git a/src/test/regress/sql/multirangetypes.sql b/src/test/regress/sql/multirangetypes.sql index 112334b03eb..e3f8cd6f4e3 100644 --- a/src/test/regress/sql/multirangetypes.sql +++ b/src/test/regress/sql/multirangetypes.sql @@ -904,3 +904,75 @@ create function mr_inoutparam_fail(inout i anyelement, out r anymultirange) --should fail create function mr_table_fail(i anyelement) returns table(i anyelement, r anymultirange) as $$ select $1, '[1,10]' $$ language sql; + +-- Restore GUCs changed by earlier index tests +RESET enable_seqscan; +RESET enable_indexscan; +RESET enable_bitmapscan; + +-- +-- test selectivity of multirange join operators +-- +create table test_mr_join_1 (mr1 int4multirange); +create table test_mr_join_2 (mr2 int4multirange); +create table test_mr_join_3 (mr3 int4multirange); + +insert into test_mr_join_1 select int4multirange(int4range(g, g+10)) from generate_series(1, 1000) g; +insert into test_mr_join_1 select int4multirange(int4range(g, g+100)) from generate_series(1, 1000, 10) g; +insert into test_mr_join_2 select int4multirange(int4range(g, g+10)) from generate_series(1, 500) g; +insert into test_mr_join_2 select int4multirange(int4range(g, g+100)) from generate_series(1, 500, 10) g; +insert into test_mr_join_3 select int4multirange(int4range(g, g+10)) from generate_series(501, 1000) g; +insert into test_mr_join_3 select int4multirange(int4range(g, g+100)) from generate_series(501, 1000, 10) g; + +analyze test_mr_join_1; +analyze test_mr_join_2; +analyze test_mr_join_3; + +-- multirange vs multirange: reorder joins based on computed selectivity +explain (costs off) select count(*) from test_mr_join_1, test_mr_join_2, test_mr_join_3 where mr1 && mr2 and mr2 && mr3; +explain (costs off) select count(*) from test_mr_join_1, test_mr_join_2, test_mr_join_3 where mr1 << mr2 and mr2 << mr3; +explain (costs off) select count(*) from test_mr_join_1, test_mr_join_2, test_mr_join_3 where mr1 >> mr2 and mr2 >> mr3; + +drop table test_mr_join_1; +drop table test_mr_join_2; +drop table test_mr_join_3; + +-- +-- test multirange join selectivity with fully disjoint histograms +-- +create table test_mr_join_lo (r int4multirange); +create table test_mr_join_hi (r int4multirange); + +insert into test_mr_join_lo select int4multirange(int4range(g, g+10)) from generate_series(1, 500) g; +insert into test_mr_join_hi select int4multirange(int4range(g, g+10)) from generate_series(10001, 10500) g; + +analyze test_mr_join_lo; +analyze test_mr_join_hi; + +-- These should not crash and should produce stable plans. +explain (costs off) select count(*) from test_mr_join_lo a, test_mr_join_hi b where a.r << b.r; +explain (costs off) select count(*) from test_mr_join_lo a, test_mr_join_hi b where a.r >> b.r; +explain (costs off) select count(*) from test_mr_join_lo a, test_mr_join_hi b where a.r && b.r; + +drop table test_mr_join_lo; +drop table test_mr_join_hi; + +-- +-- test range vs multirange join selectivity +-- +create table test_mr_join_r (r int4range); +create table test_mr_join_mr (mr int4multirange); + +insert into test_mr_join_r select int4range(g, g+10) from generate_series(1, 500) g; +insert into test_mr_join_mr select int4multirange(int4range(g, g+10)) from generate_series(10001, 10500) g; + +analyze test_mr_join_r; +analyze test_mr_join_mr; + +-- range vs multirange operators should use multirangejoinsel +explain (costs off) select count(*) from test_mr_join_r a, test_mr_join_mr b where a.r << b.mr; +explain (costs off) select count(*) from test_mr_join_r a, test_mr_join_mr b where a.r >> b.mr; +explain (costs off) select count(*) from test_mr_join_r a, test_mr_join_mr b where a.r && b.mr; + +drop table test_mr_join_r; +drop table test_mr_join_mr; -- 2.50.1 (Apple Git-155)