public inbox for [email protected]
help / color / mirror / Atom feedFrom: =?utf-8?B?Q2hlbmh1aU1v?= <[email protected]>
To: =?utf-8?B?cGdzcWwtaGFja2Vycw==?= <[email protected]>
Subject: [PATCH] Optimize numeric comparisons and aggregations via packed-datum extraction
Date: Sat, 4 Apr 2026 23:29:06 +0800
Message-ID: <[email protected]> (raw)
Hi,
While profiling the execution of MAX(), MIN(), SUM(), AVG(), and VARIANCE() on numeric columns, I noticed that a significant amount of CPU time is spent on heap allocations and varlena detoasting. Specifically, PG_GETARG_NUMERIC() unconditionally detoasts short-header datums, which incurs continuous palloc/memcpy overhead in tight aggregation loops.
This patch introduces a fast-path optimization by directly extracting numeric fields from packed datums.
The key modifications include:
1. cmp_numerics_packed(): A new comparison function that reads the `n_header` and payload directly from `VARDATA_ANY()` using pointer arithmetic. This avoids the standard `NUMERIC_*` macros which assume a 4-byte header structure, allowing us to safely compare 1-byte header numerics in-place.
2. Zero-copy MIN/MAX: In `numeric_smaller` and `numeric_larger`, instead of unpacking inputs to `Numeric` and repacking them for return, the patch uses `PG_DETOAST_DATUM_PACKED` and directly returns the original `Datum` using `PG_RETURN_DATUM(PG_GETARG_DATUM(X))`.
3. init_var_from_packed(): Extended the same direct-extraction logic to `do_numeric_accum` and `do_numeric_discard` for SUM, AVG, and VARIANCE.
I ran the built-in `numeric.sql` regression tests, and all passed without issues.
Here are the benchmark results executing on 20M rows using different precisions (NUMERIC(18,2), NUMERIC(38,2), NUMERIC(9,2)):
The DDLs and DMLs are:
drop table if exists t;
create table t(id bigserial primary key, order_date date, amount numeric(18, 2));
insert into t(order_date, amount) select
DATE '2024-01-01' + (gs % 730),
gs::numeric(18, 2) * gs % 998244353 * gs % 1000000007 * 133.31
from
generate_series(1, 20000000) as gs;
vacuum analyze t;
drop table if exists t_big;
create table t_big(id bigserial primary key, order_date date, amount numeric(38, 2));
insert into t_big(order_date, amount) select
DATE '2024-01-01' + (gs % 730),
gs::numeric(38, 2) * gs * gs * 133.31
from
generate_series(1, 20000000) as gs;
vacuum analyze t_big;
drop table if exists t_small;
create table t_small(id bigserial primary key, order_date date, amount numeric(9, 2));
insert into t_small(order_date, amount) select
DATE '2024-01-01' + (gs % 730),
(gs % 1331) * (gs % 1331) * 1.31
from
generate_series(1, 20000000) as gs;
vacuum analyze t_small;
and the TEST CASEs are:
elect max(amount) from t;
select max(amount) from t where order_date > '2025-01-01';
select order_date, max(amount) from t group by order_date;
select order_date, max(amount) from t where order_date > '2025-01-01' group by order_date;
select min(amount) from t;
select min(amount) from t where order_date > '2025-01-01';
select order_date, min(amount) from t group by order_date;
select order_date, min(amount) from t where order_date > '2025-01-01' group by order_date;
select sum(amount) from t;
select sum(amount) from t where order_date > '2025-01-01';
select order_date, sum(amount) from t group by order_date;
select order_date, sum(amount) from t where order_date > '2025-01-01' group by order_date;
select avg(amount) from t;
select avg(amount) from t where order_date > '2025-01-01';
select order_date, avg(amount) from t group by order_date;
select order_date, avg(amount) from t where order_date > '2025-01-01' group by order_date;
select variance(amount) from t;
select variance(amount) from t where order_date > '2025-01-01';
select order_date, variance(amount) from t group by order_date;
select order_date, variance(amount) from t where order_date > '2025-01-01' group by order_date;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t where order_date > '2025-01-01';
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t group by order_date;
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t where order_date > '2025-01-01' group by order_date;
select max(amount) from t_big;
select max(amount) from t_big where order_date > '2025-01-01';
select order_date, max(amount) from t_big group by order_date;
select order_date, max(amount) from t_big where order_date > '2025-01-01' group by order_date;
select min(amount) from t_big;
select min(amount) from t_big where order_date > '2025-01-01';
select order_date, min(amount) from t_big group by order_date;
select order_date, min(amount) from t_big where order_date > '2025-01-01' group by order_date;
select sum(amount) from t_big;
select sum(amount) from t_big where order_date > '2025-01-01';
select order_date, sum(amount) from t_big group by order_date;
select order_date, sum(amount) from t_big where order_date > '2025-01-01' group by order_date;
select avg(amount) from t_big;
select avg(amount) from t_big where order_date > '2025-01-01';
select order_date, avg(amount) from t_big group by order_date;
select order_date, avg(amount) from t_big where order_date > '2025-01-01' group by order_date;
select variance(amount) from t_big;
select variance(amount) from t_big where order_date > '2025-01-01';
select order_date, variance(amount) from t_big group by order_date;
select order_date, variance(amount) from t_big where order_date > '2025-01-01' group by order_date;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big where order_date > '2025-01-01';
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big group by order_date;
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big where order_date > '2025-01-01' group by order_date;
select max(amount) from t_small;
select max(amount) from t_small where order_date > '2025-01-01';
select order_date, max(amount) from t_small group by order_date;
select order_date, max(amount) from t_small where order_date > '2025-01-01' group by order_date;
select min(amount) from t_small;
select min(amount) from t_small where order_date > '2025-01-01';
select order_date, min(amount) from t_small group by order_date;
select order_date, min(amount) from t_small where order_date > '2025-01-01' group by order_date;
select sum(amount) from t_small;
select sum(amount) from t_small where order_date > '2025-01-01';
select order_date, sum(amount) from t_small group by order_date;
select order_date, sum(amount) from t_small where order_date > '2025-01-01' group by order_date;
select avg(amount) from t_small;
select avg(amount) from t_small where order_date > '2025-01-01';
select order_date, avg(amount) from t_small group by order_date;
select order_date, avg(amount) from t_small where order_date > '2025-01-01' group by order_date;
select variance(amount) from t_small;
select variance(amount) from t_small where order_date > '2025-01-01';
select order_date, variance(amount) from t_small group by order_date;
select order_date, variance(amount) from t_small where order_date > '2025-01-01' group by order_date;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small where order_date > '2025-01-01';
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small group by order_date;
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small where order_date > '2025-01-01' group by order_date;
And I ran the test cases on my machine (12th Gen Intel(R) Core(TM) i9-12900H (2.50 GHz), 64GB RAM), better performance shows below:
# ====== DATASET: t (20M rows, NUMERIC(18,2)) ======
# ---- Baseline (All optimizations reverted) ----
# MAX queries
select max(amount) from t; Time: 980.844 ms
select max(amount) from t where order_date > '2025-01-01'; Time: 406.842 ms
select order_date, max(amount) from t group by order_date; Time: 952.209 ms
select order_date, max(amount) from t where order_date > '2025-01-01' group by ...; Time: 571.053 ms
# MIN queries
select min(amount) from t; Time: 567.840 ms
select min(amount) from t where order_date > '2025-01-01'; Time: 405.364 ms
select order_date, min(amount) from t group by order_date; Time: 935.274 ms
select order_date, min(amount) from t where order_date > '2025-01-01' group by ...; Time: 546.571 ms
# SUM queries
select sum(amount) from t; Time: 562.059 ms
select sum(amount) from t where order_date > '2025-01-01'; Time: 400.519 ms
select order_date, sum(amount) from t group by order_date; Time: 920.640 ms
select order_date, sum(amount) from t where order_date > '2025-01-01' group by ...; Time: 543.760 ms
# AVG queries
select avg(amount) from t; Time: 566.719 ms
select avg(amount) from t where order_date > '2025-01-01'; Time: 404.249 ms
select order_date, avg(amount) from t group by order_date; Time: 922.547 ms
select order_date, avg(amount) from t where order_date > '2025-01-01' group by ...; Time: 534.897 ms
# VARIANCE queries
select variance(amount) from t; Time: 699.547 ms
select variance(amount) from t where order_date > '2025-01-01'; Time: 479.679 ms
select order_date, variance(amount) from t group by order_date; Time: 1081.712 ms
select order_date, variance(amount) from t where order_date > '2025-01-01' group by Time: 620.472 ms
# COMBINED queries
select max, min, sum, avg, variance from t; Time: 1686.727 ms
select max, min, sum, avg, variance from t where order_date > '2025-01-01'; Time: 971.959 ms
select order_date, max, min, sum, avg, variance from t group by order_date; Time: 2206.506 ms
select order_date, max, min, sum... from t where order_date > '2025-01-01'... Time: 1155.119 ms
# ---- Optimized (packed-datum extraction for MAX, MIN, SUM, AVG, VARIANCE) ----
# MAX queries
select max(amount) from t; Time: 297.890 ms (-70%)
select max(amount) from t where order_date > '2025-01-01'; Time: 261.415 ms (-36%)
select order_date, max(amount) from t group by order_date; Time: 702.121 ms (-26%)
select order_date, max(amount) from t where order_date > '2025-01-01' group by ...; Time: 407.533 ms (-29%)
# MIN queries
select min(amount) from t; Time: 301.846 ms (-47%)
select min(amount) from t where order_date > '2025-01-01'; Time: 266.653 ms (-34%)
select order_date, min(amount) from t group by order_date; Time: 681.805 ms (-27%)
select order_date, min(amount) from t where order_date > '2025-01-01' group by ...; Time: 413.830 ms (-24%)
# SUM queries
select sum(amount) from t; Time: 300.850 ms (-46%)
select sum(amount) from t where order_date > '2025-01-01'; Time: 282.615 ms (-29%)
select order_date, sum(amount) from t group by order_date; Time: 765.818 ms (-17%)
select order_date, sum(amount) from t where order_date > '2025-01-01' group by ...; Time: 463.414 ms (-15%)
# AVG queries
select avg(amount) from t; Time: 327.844 ms (-42%)
select avg(amount) from t where order_date > '2025-01-01'; Time: 365.507 ms (-10%)
select order_date, avg(amount) from t group by order_date; Time: 650.977 ms (-29%)
select order_date, avg(amount) from t where order_date > '2025-01-01' group by ...; Time: 430.043 ms (-20%)
# VARIANCE queries
select variance(amount) from t; Time: 505.815 ms (-28%)
select variance(amount) from t where order_date > '2025-01-01'; Time: 403.832 ms (-16%)
select order_date, variance(amount) from t group by order_date; Time: 901.068 ms (-17%)
select order_date, variance(amount) from t where order_date > '2025-01-01' group by Time: 561.622 ms (-9%)
# COMBINED queries
select max, min, sum, avg, variance from t; Time: 804.479 ms (-52%)
select max, min, sum, avg, variance from t where order_date > '2025-01-01'; Time: 530.819 ms (-45%)
select order_date, max, min, sum, avg, variance from t group by order_date; Time: 1241.194 ms (-44%)
select order_date, max, min, sum... from t where order_date > '2025-01-01'... Time: 741.431 ms (-36%)
# ====== DATASET: t_big (20M rows, NUMERIC(38,2)) ======
# ---- Baseline (All optimizations reverted) ----
# MAX queries
select max(amount) from t_big; Time: 1143.060 ms
select max(amount) from t... where order_date > '2025-01-01'; Time: 462.994 ms
select order_date, max(amount) from t_big group by order_date; Time: 1032.899 ms
select order_date, max... from t... where order_date > '2025-01-01' group by ...; Time: 556.832 ms
# MIN queries
select min(amount) from t_big; Time: 534.970 ms
select min(amount) from t... where order_date > '2025-01-01'; Time: 412.798 ms
select order_date, min(amount) from t_big group by order_date; Time: 923.593 ms
select order_date, min... from t... where order_date > '2025-01-01' group by ...; Time: 565.523 ms
# SUM queries
select sum(amount) from t_big; Time: 611.950 ms
select sum(amount) from t... where order_date > '2025-01-01'; Time: 476.377 ms
select order_date, sum(amount) from t_big group by order_date; Time: 1009.434 ms
select order_date, sum... from t... where order_date > '2025-01-01' group by ...; Time: 589.432 ms
# AVG queries
select avg(amount) from t_big; Time: 601.082 ms
select avg(amount) from t... where order_date > '2025-01-01'; Time: 450.408 ms
select order_date, avg(amount) from t_big group by order_date; Time: 1038.965 ms
select order_date, avg... from t... where order_date > '2025-01-01' group by ...; Time: 670.555 ms
# VARIANCE queries
select variance(amount) from t_big; Time: 1019.580 ms
select variance(amount) from t... where order_date > '2025-01-01'; Time: 658.554 ms
select order_date, variance(amount) from t_big group by order_date; Time: 1366.489 ms
select order_date, variance... from t... where order_date > '2025-01-01' group by Time: 766.608 ms
# COMBINED queries
select max, min, sum, avg, variance from t_big; Time: 1797.785 ms
select max, min, sum, avg, variance from t... where order_date > '2025-01-01'; Time: 1064.796 ms
select order_date, max, min, sum, avg, variance from t_big group by order_date; Time: 2254.985 ms
select order_date, max, min, sum... from t... where order_date > '2025-01-01'... Time: 1189.765 ms
# ---- Optimized (packed-datum extraction for MAX, MIN, SUM, AVG, VARIANCE) ----
# MAX queries
select max(amount) from t_big; Time: 457.682 ms (-60%)
select max(amount) from t... where order_date > '2025-01-01'; Time: 342.199 ms (-26%)
select order_date, max(amount) from t_big group by order_date; Time: 777.064 ms (-25%)
select order_date, max... from t... where order_date > '2025-01-01' group by ...; Time: 463.163 ms (-17%)
# MIN queries
select min(amount) from t_big; Time: 328.795 ms (-39%)
select min(amount) from t... where order_date > '2025-01-01'; Time: 305.936 ms (-26%)
select order_date, min(amount) from t_big group by order_date; Time: 693.899 ms (-25%)
select order_date, min... from t... where order_date > '2025-01-01' group by ...; Time: 456.664 ms (-19%)
# SUM queries
select sum(amount) from t_big; Time: 329.647 ms (-46%)
select sum(amount) from t... where order_date > '2025-01-01'; Time: 311.748 ms (-35%)
select order_date, sum(amount) from t_big group by order_date; Time: 703.230 ms (-30%)
select order_date, sum... from t... where order_date > '2025-01-01' group by ...; Time: 473.354 ms (-20%)
# AVG queries
select avg(amount) from t_big; Time: 357.884 ms (-40%)
select avg(amount) from t... where order_date > '2025-01-01'; Time: 381.753 ms (-15%)
select order_date, avg(amount) from t_big group by order_date; Time: 707.939 ms (-32%)
select order_date, avg... from t... where order_date > '2025-01-01' group by ...; Time: 489.593 ms (-27%)
# VARIANCE queries
select variance(amount) from t_big; Time: 723.205 ms (-29%)
select variance(amount) from t... where order_date > '2025-01-01'; Time: 481.065 ms (-27%)
select order_date, variance(amount) from t_big group by order_date; Time: 1273.396 ms (-7%)
select order_date, variance... from t... where order_date > '2025-01-01' group by Time: 692.473 ms (-10%)
# COMBINED queries
select max, min, sum, avg, variance from t_big; Time: 1144.183 ms (-36%)
select max, min, sum, avg, variance from t... where order_date > '2025-01-01'; Time: 735.677 ms (-31%)
select order_date, max, min, sum, avg, variance from t_big group by order_date; Time: 1603.737 ms (-29%)
select order_date, max, min, sum... from t... where order_date > '2025-01-01'... Time: 869.972 ms (-27%)
# ====== DATASET: t_small (20M rows, NUMERIC(9,2)) ======
# ---- Baseline (All optimizations reverted) ----
# MAX queries
select max(amount) from t_small; Time: 919.394 ms
select max(amount) from t... where order_date > '2025-01-01'; Time: 414.951 ms
select order_date, max(amount) from t_small group by order_date; Time: 939.192 ms
select order_date, max... from t... where order_date > '2025-01-01' group by ...; Time: 548.936 ms
# MIN queries
select min(amount) from t_small; Time: 566.213 ms
select min(amount) from t... where order_date > '2025-01-01'; Time: 468.580 ms
select order_date, min(amount) from t_small group by order_date; Time: 973.943 ms
select order_date, min... from t... where order_date > '2025-01-01' group by ...; Time: 536.868 ms
# SUM queries
select sum(amount) from t_small; Time: 549.539 ms
select sum(amount) from t... where order_date > '2025-01-01'; Time: 405.480 ms
select order_date, sum(amount) from t_small group by order_date; Time: 970.394 ms
select order_date, sum... from t... where order_date > '2025-01-01' group by ...; Time: 521.120 ms
# AVG queries
select avg(amount) from t_small; Time: 557.897 ms
select avg(amount) from t... where order_date > '2025-01-01'; Time: 417.120 ms
select order_date, avg(amount) from t_small group by order_date; Time: 937.021 ms
select order_date, avg... from t... where order_date > '2025-01-01' group by ...; Time: 552.019 ms
# VARIANCE queries
select variance(amount) from t_small; Time: 655.459 ms
select variance(amount) from t... where order_date > '2025-01-01'; Time: 494.058 ms
select order_date, variance(amount) from t_small group by order_date; Time: 1051.335 ms
select order_date, variance... from t... where order_date > '2025-01-01' group by Time: 606.667 ms
# COMBINED queries
select max, min, sum, avg, variance from t_small; Time: 1489.296 ms
select max, min, sum, avg, variance from t... where order_date > '2025-01-01'; Time: 892.727 ms
select order_date, max, min, sum, avg, variance from t_small group by order_date; Time: 1940.004 ms
select order_date, max, min, sum... from t... where order_date > '2025-01-01'... Time: 1041.348 ms
# ---- Optimized (packed-datum extraction for MAX, MIN, SUM, AVG, VARIANCE) ----
# MAX queries
select max(amount) from t_small; Time: 332.218 ms (-64%)
select max(amount) from t... where order_date > '2025-01-01'; Time: 293.562 ms (-29%)
select order_date, max(amount) from t_small group by order_date; Time: 670.589 ms (-29%)
select order_date, max... from t... where order_date > '2025-01-01' group by ...; Time: 485.264 ms (-12%)
# MIN queries
select min(amount) from t_small; Time: 303.559 ms (-46%)
select min(amount) from t... where order_date > '2025-01-01'; Time: 330.970 ms (-29%)
select order_date, min(amount) from t_small group by order_date; Time: 726.441 ms (-25%)
select order_date, min... from t... where order_date > '2025-01-01' group by ...; Time: 455.427 ms (-15%)
# SUM queries
select sum(amount) from t_small; Time: 331.040 ms (-40%)
select sum(amount) from t... where order_date > '2025-01-01'; Time: 354.306 ms (-13%)
select order_date, sum(amount) from t_small group by order_date; Time: 731.451 ms (-25%)
select order_date, sum... from t... where order_date > '2025-01-01' group by ...; Time: 479.442 ms (-8%)
# AVG queries
select avg(amount) from t_small; Time: 397.363 ms (-29%)
select avg(amount) from t... where order_date > '2025-01-01'; Time: 325.047 ms (-22%)
select order_date, avg(amount) from t_small group by order_date; Time: 750.833 ms (-20%)
select order_date, avg... from t... where order_date > '2025-01-01' group by ...; Time: 547.354 ms (-1%)
# VARIANCE queries
select variance(amount) from t_small; Time: 504.897 ms (-23%)
select variance(amount) from t... where order_date > '2025-01-01'; Time: 405.917 ms (-18%)
select order_date, variance(amount) from t_small group by order_date; Time: 871.387 ms (-17%)
select order_date, variance... from t... where order_date > '2025-01-01' group by Time: 542.256 ms (-11%)
# COMBINED queries
select max, min, sum, avg, variance from t_small; Time: 753.569 ms (-49%)
select max, min, sum, avg, variance from t... where order_date > '2025-01-01'; Time: 524.862 ms (-41%)
select order_date, max, min, sum, avg, variance from t_small group by order_date; Time: 1228.248 ms (-37%)
select order_date, max, min, sum... from t... where order_date > '2025-01-01'... Time: 724.252 ms (-30%)
Feedback and review welcome.
--
Regards, Chenhui Mo,
pgEdge
Attachments:
[application/octet-stream] 0001-numeric_opt.patch (10.7K, 3-0001-numeric_opt.patch)
download | inline diff:
From 7962b52586fa78f4018305ea5aebc60be3586acd Mon Sep 17 00:00:00 2001
From: sohardforaname <[email protected]>
Date: Sat, 4 Apr 2026 22:17:44 +0800
Subject: [PATCH] numeric_opt
---
src/backend/utils/adt/numeric.c | 250 ++++++++++++++++++++++++++++----
1 file changed, 222 insertions(+), 28 deletions(-)
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index cb23dfe9b95..17d089a661b 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -2519,6 +2519,156 @@ numeric_le(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+/*
+ * cmp_numerics_packed() -
+ *
+ * Compare two packed Numeric varlenas without detoasting short-header
+ * datums. This avoids palloc/memcpy overhead for the common case of
+ * 1-byte varlena headers (small numerics on heap pages).
+ *
+ * The key insight: VARDATA_ANY() returns a pointer to the start of the
+ * NumericChoice data regardless of whether the varlena has a 1-byte or
+ * 4-byte header. We read n_header from there and extract all needed
+ * fields using pointer arithmetic rather than the standard NUMERIC_*
+ * macros (which assume a 4-byte varlena header via the Numeric struct).
+ */
+static int
+cmp_numerics_packed(Numeric num1, Numeric num2)
+{
+ uint16 header1;
+ uint16 header2;
+ char *data1;
+ char *data2;
+ int result;
+
+ /*
+ * Get pointers to the NumericChoice data, which starts right after the
+ * varlena header (1 or 4 bytes).
+ */
+ data1 = VARDATA_ANY(num1);
+ data2 = VARDATA_ANY(num2);
+
+ /*
+ * Read the n_header words. We must use memcpy because data1/data2 may
+ * be unaligned (when the varlena has a 1-byte header, the data starts at
+ * an odd offset).
+ */
+ memcpy(&header1, data1, sizeof(uint16));
+ memcpy(&header2, data2, sizeof(uint16));
+
+ /* Handle special values (NaN, Inf) — same logic as cmp_numerics */
+ if ((header1 & NUMERIC_SIGN_MASK) == NUMERIC_SPECIAL)
+ {
+ if (header1 == NUMERIC_NAN)
+ {
+ if (header2 == NUMERIC_NAN)
+ result = 0; /* NAN = NAN */
+ else
+ result = 1; /* NAN > non-NAN */
+ }
+ else if (header1 == NUMERIC_PINF)
+ {
+ if (header2 == NUMERIC_NAN)
+ result = -1; /* PINF < NAN */
+ else if (header2 == NUMERIC_PINF)
+ result = 0; /* PINF = PINF */
+ else
+ result = 1; /* PINF > anything else */
+ }
+ else /* num1 must be NINF */
+ {
+ if (header2 == NUMERIC_NINF)
+ result = 0; /* NINF = NINF */
+ else
+ result = -1; /* NINF < anything else */
+ }
+ }
+ else if ((header2 & NUMERIC_SIGN_MASK) == NUMERIC_SPECIAL)
+ {
+ if (header2 == NUMERIC_NINF)
+ result = 1; /* normal > NINF */
+ else
+ result = -1; /* normal < NAN or PINF */
+ }
+ else
+ {
+ /*
+ * Both are regular numerics. Extract fields from the raw data.
+ *
+ * For short-format numerics (header & 0x8000 != 0):
+ * - n_header is 2 bytes, digits follow immediately
+ * - sign is encoded in bit 0x2000
+ * - weight is in low 7 bits with sign extension
+ *
+ * For long-format numerics (header & 0x8000 == 0):
+ * - n_sign_dscale is 2 bytes, then n_weight is 2 bytes, then digits
+ * - sign is in high 2 bits of n_sign_dscale
+ */
+ int sign1,
+ sign2;
+ int weight1,
+ weight2;
+ NumericDigit *digits1,
+ *digits2;
+ int ndigits1,
+ ndigits2;
+ int data_len1,
+ data_len2;
+
+ data_len1 = (int) VARSIZE_ANY_EXHDR(num1);
+ data_len2 = (int) VARSIZE_ANY_EXHDR(num2);
+
+ if (header1 & 0x8000)
+ {
+ /* Short format */
+ sign1 = (header1 & NUMERIC_SHORT_SIGN_MASK) ? NUMERIC_NEG : NUMERIC_POS;
+ weight1 = (header1 & NUMERIC_SHORT_WEIGHT_SIGN_MASK ?
+ ~NUMERIC_SHORT_WEIGHT_MASK : 0) |
+ (header1 & NUMERIC_SHORT_WEIGHT_MASK);
+ digits1 = (NumericDigit *) (data1 + sizeof(uint16));
+ ndigits1 = (data_len1 - (int) sizeof(uint16)) / (int) sizeof(NumericDigit);
+ }
+ else
+ {
+ /* Long format */
+ int16 n_weight1;
+
+ sign1 = header1 & NUMERIC_SIGN_MASK;
+ memcpy(&n_weight1, data1 + sizeof(uint16), sizeof(int16));
+ weight1 = n_weight1;
+ digits1 = (NumericDigit *) (data1 + sizeof(uint16) + sizeof(int16));
+ ndigits1 = (data_len1 - (int) sizeof(uint16) - (int) sizeof(int16)) / (int) sizeof(NumericDigit);
+ }
+
+ if (header2 & 0x8000)
+ {
+ /* Short format */
+ sign2 = (header2 & NUMERIC_SHORT_SIGN_MASK) ? NUMERIC_NEG : NUMERIC_POS;
+ weight2 = (header2 & NUMERIC_SHORT_WEIGHT_SIGN_MASK ?
+ ~NUMERIC_SHORT_WEIGHT_MASK : 0) |
+ (header2 & NUMERIC_SHORT_WEIGHT_MASK);
+ digits2 = (NumericDigit *) (data2 + sizeof(uint16));
+ ndigits2 = (data_len2 - (int) sizeof(uint16)) / (int) sizeof(NumericDigit);
+ }
+ else
+ {
+ /* Long format */
+ int16 n_weight2;
+
+ sign2 = header2 & NUMERIC_SIGN_MASK;
+ memcpy(&n_weight2, data2 + sizeof(uint16), sizeof(int16));
+ weight2 = n_weight2;
+ digits2 = (NumericDigit *) (data2 + sizeof(uint16) + sizeof(int16));
+ ndigits2 = (data_len2 - (int) sizeof(uint16) - (int) sizeof(int16)) / (int) sizeof(NumericDigit);
+ }
+
+ result = cmp_var_common(digits1, ndigits1, weight1, sign1,
+ digits2, ndigits2, weight2, sign2);
+ }
+
+ return result;
+}
+
static int
cmp_numerics(Numeric num1, Numeric num2)
{
@@ -3450,17 +3600,18 @@ numeric_inc(PG_FUNCTION_ARGS)
Datum
numeric_smaller(PG_FUNCTION_ARGS)
{
- Numeric num1 = PG_GETARG_NUMERIC(0);
- Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric num1 = (Numeric) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(0));
+ Numeric num2 = (Numeric) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(1));
/*
- * Use cmp_numerics so that this will agree with the comparison operators,
- * particularly as regards comparisons involving NaN.
+ * Use cmp_numerics_packed so that this will agree with the comparison
+ * operators, particularly as regards comparisons involving NaN.
+ * This avoids palloc/memcpy overhead for 1-byte varlena headers.
*/
- if (cmp_numerics(num1, num2) < 0)
- PG_RETURN_NUMERIC(num1);
+ if (cmp_numerics_packed(num1, num2) < 0)
+ PG_RETURN_DATUM(PG_GETARG_DATUM(0));
else
- PG_RETURN_NUMERIC(num2);
+ PG_RETURN_DATUM(PG_GETARG_DATUM(1));
}
@@ -3472,17 +3623,18 @@ numeric_smaller(PG_FUNCTION_ARGS)
Datum
numeric_larger(PG_FUNCTION_ARGS)
{
- Numeric num1 = PG_GETARG_NUMERIC(0);
- Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric num1 = (Numeric) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(0));
+ Numeric num2 = (Numeric) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(1));
/*
- * Use cmp_numerics so that this will agree with the comparison operators,
- * particularly as regards comparisons involving NaN.
+ * Use cmp_numerics_packed so that this will agree with the comparison
+ * operators, particularly as regards comparisons involving NaN.
+ * This avoids palloc/memcpy overhead for 1-byte varlena headers.
*/
- if (cmp_numerics(num1, num2) > 0)
- PG_RETURN_NUMERIC(num1);
+ if (cmp_numerics_packed(num1, num2) > 0)
+ PG_RETURN_DATUM(PG_GETARG_DATUM(0));
else
- PG_RETURN_NUMERIC(num2);
+ PG_RETURN_DATUM(PG_GETARG_DATUM(1));
}
@@ -4808,6 +4960,48 @@ makeNumericAggStateCurrentContext(bool calcSumX2)
return state;
}
+/*
+ * Safely initialize a NumericVar from a potentially packed short-header datum.
+ */
+static void
+init_var_from_packed(Numeric num, NumericVar *dest)
+{
+ uint16 header;
+ char *data;
+
+ data = VARDATA_ANY(num);
+ memcpy(&header, data, sizeof(uint16));
+
+ dest->buf = NULL;
+ if ((header & NUMERIC_SIGN_MASK) == NUMERIC_SPECIAL)
+ {
+ dest->ndigits = 0;
+ dest->weight = 0;
+ dest->sign = header & NUMERIC_EXT_SIGN_MASK;
+ dest->dscale = 0;
+ dest->digits = NULL;
+ }
+ else if ((header & 0x8000) != 0)
+ {
+ dest->ndigits = (VARSIZE_ANY_EXHDR(num) - sizeof(uint16)) / sizeof(NumericDigit);
+ dest->weight = (header & NUMERIC_SHORT_WEIGHT_SIGN_MASK ? ~NUMERIC_SHORT_WEIGHT_MASK : 0)
+ | (header & NUMERIC_SHORT_WEIGHT_MASK);
+ dest->sign = (header & NUMERIC_SHORT_SIGN_MASK) ? NUMERIC_NEG : NUMERIC_POS;
+ dest->dscale = (header & NUMERIC_SHORT_DSCALE_MASK) >> NUMERIC_SHORT_DSCALE_SHIFT;
+ dest->digits = (NumericDigit *) (data + sizeof(uint16));
+ }
+ else
+ {
+ int16 weight;
+ memcpy(&weight, data + sizeof(uint16), sizeof(int16));
+ dest->ndigits = (VARSIZE_ANY_EXHDR(num) - sizeof(uint16) - sizeof(int16)) / sizeof(NumericDigit);
+ dest->weight = weight;
+ dest->sign = header & NUMERIC_SIGN_MASK;
+ dest->dscale = header & NUMERIC_DSCALE_MASK;
+ dest->digits = (NumericDigit *) (data + sizeof(uint16) + sizeof(int16));
+ }
+}
+
/*
* Accumulate a new input value for numeric aggregate functions.
*/
@@ -4818,21 +5012,21 @@ do_numeric_accum(NumericAggState *state, Numeric newval)
NumericVar X2;
MemoryContext old_context;
+ /* load processed number in short-lived context */
+ init_var_from_packed(newval, &X);
+
/* Count NaN/infinity inputs separately from all else */
- if (NUMERIC_IS_SPECIAL(newval))
+ if (X.sign == NUMERIC_NAN || X.sign == NUMERIC_PINF || X.sign == NUMERIC_NINF)
{
- if (NUMERIC_IS_PINF(newval))
+ if (X.sign == NUMERIC_PINF)
state->pInfcount++;
- else if (NUMERIC_IS_NINF(newval))
+ else if (X.sign == NUMERIC_NINF)
state->nInfcount++;
else
state->NaNcount++;
return;
}
- /* load processed number in short-lived context */
- init_var_from_num(newval, &X);
-
/*
* Track the highest input dscale that we've seen, to support inverse
* transitions (see do_numeric_discard).
@@ -4888,21 +5082,21 @@ do_numeric_discard(NumericAggState *state, Numeric newval)
NumericVar X2;
MemoryContext old_context;
+ /* load processed number in short-lived context */
+ init_var_from_packed(newval, &X);
+
/* Count NaN/infinity inputs separately from all else */
- if (NUMERIC_IS_SPECIAL(newval))
+ if (X.sign == NUMERIC_NAN || X.sign == NUMERIC_PINF || X.sign == NUMERIC_NINF)
{
- if (NUMERIC_IS_PINF(newval))
+ if (X.sign == NUMERIC_PINF)
state->pInfcount--;
- else if (NUMERIC_IS_NINF(newval))
+ else if (X.sign == NUMERIC_NINF)
state->nInfcount--;
else
state->NaNcount--;
return true;
}
- /* load processed number in short-lived context */
- init_var_from_num(newval, &X);
-
/*
* state->sumX's dscale is the maximum dscale of any of the inputs.
* Removing the last input with that dscale would require us to recompute
@@ -4986,7 +5180,7 @@ numeric_accum(PG_FUNCTION_ARGS)
state = makeNumericAggState(fcinfo, true);
if (!PG_ARGISNULL(1))
- do_numeric_accum(state, PG_GETARG_NUMERIC(1));
+ do_numeric_accum(state, (Numeric) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(1)));
PG_RETURN_POINTER(state);
}
@@ -5078,7 +5272,7 @@ numeric_avg_accum(PG_FUNCTION_ARGS)
state = makeNumericAggState(fcinfo, false);
if (!PG_ARGISNULL(1))
- do_numeric_accum(state, PG_GETARG_NUMERIC(1));
+ do_numeric_accum(state, (Numeric) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(1)));
PG_RETURN_POINTER(state);
}
--
2.34.1
view thread (2+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected]
Subject: Re: [PATCH] Optimize numeric comparisons and aggregations via packed-datum extraction
In-Reply-To: <[email protected]>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox