From 0b810edcdc5e80ab4ec7d77a8d6687fdedd73b32 Mon Sep 17 00:00:00 2001 From: Haibo Yan Date: Thu, 23 Apr 2026 12:43:31 -0700 Subject: [PATCH v1 3/5] matview: use buffered-insert API for CMV and RMV Adopt the buffered-insert lifecycle API in the transientrel datafill path used by CREATE MATERIALIZED VIEW and REFRESH MATERIALIZED VIEW. The path uses table_buffered_insert_begin()/put()/end() with a NULL flush callback and falls back to the existing single-row path when needed. Retain the volatile-function check conservatively for the initial patch series. Concurrent refresh is unchanged. Add focused CMV/RMV regression tests. --- src/backend/commands/matview.c | 99 +++++++++++++---- src/test/regress/expected/matview.out | 152 ++++++++++++++++++++++++++ src/test/regress/sql/matview.sql | 59 ++++++++++ 3 files changed, 291 insertions(+), 19 deletions(-) diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index f7d8007f796..7de4acbdfb1 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -31,6 +31,7 @@ #include "executor/executor.h" #include "executor/spi.h" #include "miscadmin.h" +#include "optimizer/optimizer.h" #include "pgstat.h" #include "rewrite/rewriteHandler.h" #include "storage/lmgr.h" @@ -50,7 +51,9 @@ typedef struct Relation transientrel; /* relation to write to */ CommandId output_cid; /* cmin to insert in output tuples */ uint32 ti_options; /* table_tuple_insert performance options */ - BulkInsertState bistate; /* bulk insert state */ + BulkInsertState bistate; /* bulk insert state (fallback path only) */ + TableBufferedInsertState buffered_state; /* buffered-insert state, or NULL */ + bool use_buffered_insert; /* true if buffered path is eligible */ } DR_transientrel; static int matview_maintenance_depth = 0; @@ -427,6 +430,24 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, /* Plan the query which will generate data for the refresh. */ plan = pg_plan_query(query, queryString, CURSOR_OPT_PARALLEL_OK, NULL, NULL); + /* + * Conservative implementation choice: disable the buffered-insert path + * if the planned target list contains volatile functions. + * + * The buffered-insert API contract does not require this check — the + * matview's defining query was parsed at creation time and cannot + * reference the transient target table. This guard is retained for the + * initial patch series as a caller-local conservatism and can be relaxed + * after validation without any API change. + */ + { + DR_transientrel *myState = (DR_transientrel *) dest; + + myState->use_buffered_insert = + !contain_volatile_functions_after_planning( + (Expr *) plan->planTree->targetlist); + } + /* * Use a snapshot with an updated command ID to ensure this query sees * results of any previously executed queries. (This could only matter if @@ -492,7 +513,40 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) myState->transientrel = transientrel; myState->output_cid = GetCurrentCommandId(true); myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN; - myState->bistate = GetBulkInsertState(); + + if (myState->use_buffered_insert) + { + /* + * Try the buffered-insert path. Pass NULL flush callback — the + * transient table has no indexes, triggers, or per-tuple post-insert + * work during the datafill phase. + */ + myState->ti_options |= TABLE_INSERT_BAS_BULKWRITE; + myState->buffered_state = + table_buffered_insert_begin(transientrel, + myState->output_cid, + myState->ti_options, + NULL, NULL); + + if (myState->buffered_state != NULL) + { + myState->bistate = NULL; + } + else + { + /* AM does not support buffered inserts; fall back */ + myState->bistate = GetBulkInsertState(); + } + } + else + { + /* + * Buffered insertion not selected for this datafill. Currently this + * is reached when the conservative volatile-function guard fires. + */ + myState->buffered_state = NULL; + myState->bistate = GetBulkInsertState(); + } /* * Valid smgr_targblock implies something already wrote to the relation. @@ -509,20 +563,19 @@ transientrel_receive(TupleTableSlot *slot, DestReceiver *self) { DR_transientrel *myState = (DR_transientrel *) self; - /* - * Note that the input slot might not be of the type of the target - * relation. That's supported by table_tuple_insert(), but slightly less - * efficient than inserting with the right slot - but the alternative - * would be to copy into a slot of the right type, which would not be - * cheap either. This also doesn't allow accessing per-AM data (say a - * tuple's xmin), but since we don't do that here... - */ - - table_tuple_insert(myState->transientrel, - slot, - myState->output_cid, - myState->ti_options, - myState->bistate); + /* Both paths accept the caller-provided slot directly. */ + if (myState->buffered_state != NULL) + { + table_buffered_insert_put(myState->buffered_state, slot); + } + else + { + table_tuple_insert(myState->transientrel, + slot, + myState->output_cid, + myState->ti_options, + myState->bistate); + } /* We know this is a newly created relation, so there are no indexes */ @@ -537,9 +590,17 @@ transientrel_shutdown(DestReceiver *self) { DR_transientrel *myState = (DR_transientrel *) self; - FreeBulkInsertState(myState->bistate); - - table_finish_bulk_insert(myState->transientrel, myState->ti_options); + if (myState->buffered_state != NULL) + { + /* end() flushes remaining tuples and subsumes finish_bulk_insert */ + table_buffered_insert_end(myState->buffered_state); + myState->buffered_state = NULL; + } + else + { + FreeBulkInsertState(myState->bistate); + table_finish_bulk_insert(myState->transientrel, myState->ti_options); + } /* close transientrel, but keep lock until commit */ table_close(myState->transientrel, NoLock); diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out index 0355720dfc6..330ea4989f2 100644 --- a/src/test/regress/expected/matview.out +++ b/src/test/regress/expected/matview.out @@ -699,3 +699,155 @@ NOTICE: relation "matview_ine_tab" already exists, skipping (0 rows) DROP MATERIALIZED VIEW matview_ine_tab; +-- +-- Tests for CMV/RMV with buffered-insert path. +-- +-- These tests verify correctness under both the buffered path (no volatile +-- functions) and the fallback single-row path (volatile functions present). +-- Path selection is not directly observable in SQL output. +-- +-- CMV: basic correctness +CREATE MATERIALIZED VIEW mv_buffered_1 AS + SELECT g AS a, g * 10 AS b FROM generate_series(1, 5) g; +SELECT count(*) FROM mv_buffered_1; + count +------- + 5 +(1 row) + +SELECT * FROM mv_buffered_1 ORDER BY a; + a | b +---+---- + 1 | 10 + 2 | 20 + 3 | 30 + 4 | 40 + 5 | 50 +(5 rows) + +-- RMV: refresh repopulates correctly +REFRESH MATERIALIZED VIEW mv_buffered_1; +SELECT count(*) FROM mv_buffered_1; + count +------- + 5 +(1 row) + +SELECT * FROM mv_buffered_1 ORDER BY a; + a | b +---+---- + 1 | 10 + 2 | 20 + 3 | 30 + 4 | 40 + 5 | 50 +(5 rows) + +DROP MATERIALIZED VIEW mv_buffered_1; +-- CMV + RMV: bulk case to exercise auto-flush (>1000 rows) +CREATE MATERIALIZED VIEW mv_buffered_bulk AS + SELECT g AS a FROM generate_series(1, 2500) g; +SELECT count(*) FROM mv_buffered_bulk; + count +------- + 2500 +(1 row) + +SELECT min(a), max(a) FROM mv_buffered_bulk; + min | max +-----+------ + 1 | 2500 +(1 row) + +REFRESH MATERIALIZED VIEW mv_buffered_bulk; +SELECT count(*) FROM mv_buffered_bulk; + count +------- + 2500 +(1 row) + +SELECT min(a), max(a) FROM mv_buffered_bulk; + min | max +-----+------ + 1 | 2500 +(1 row) + +DROP MATERIALIZED VIEW mv_buffered_bulk; +-- Wide tuples: verify no regression +CREATE MATERIALIZED VIEW mv_buffered_wide AS + SELECT g AS id, + repeat('x', 200) AS col1, + repeat('y', 200) AS col2, + repeat('z', 200) AS col3 + FROM generate_series(1, 100) g; +SELECT count(*) FROM mv_buffered_wide; + count +------- + 100 +(1 row) + +SELECT id, length(col1), length(col2), length(col3) + FROM mv_buffered_wide WHERE id IN (1, 50, 100) ORDER BY id; + id | length | length | length +-----+--------+--------+-------- + 1 | 200 | 200 | 200 + 50 | 200 | 200 | 200 + 100 | 200 | 200 | 200 +(3 rows) + +DROP MATERIALIZED VIEW mv_buffered_wide; +-- Volatile-function fallback: random() triggers conservative guard. +-- Result must still be correct through the single-row path. +CREATE MATERIALIZED VIEW mv_volatile AS + SELECT g AS a, random() AS r FROM generate_series(1, 10) g; +SELECT count(*) FROM mv_volatile; + count +------- + 10 +(1 row) + +SELECT a FROM mv_volatile ORDER BY a; + a +---- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +REFRESH MATERIALIZED VIEW mv_volatile; +SELECT count(*) FROM mv_volatile; + count +------- + 10 +(1 row) + +SELECT a FROM mv_volatile ORDER BY a; + a +---- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +DROP MATERIALIZED VIEW mv_volatile; +-- WITH NO DATA: unchanged behavior +CREATE MATERIALIZED VIEW mv_nodata AS SELECT 1 AS a WITH NO DATA; +SELECT count(*) FROM mv_nodata; -- error: not populated +ERROR: materialized view "mv_nodata" has not been populated +HINT: Use the REFRESH MATERIALIZED VIEW command. +REFRESH MATERIALIZED VIEW mv_nodata WITH NO DATA; +DROP MATERIALIZED VIEW mv_nodata; diff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql index 934426b9ae8..43ceee50e90 100644 --- a/src/test/regress/sql/matview.sql +++ b/src/test/regress/sql/matview.sql @@ -318,3 +318,62 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) CREATE MATERIALIZED VIEW IF NOT EXISTS matview_ine_tab AS SELECT 1 / 0 WITH NO DATA; -- ok DROP MATERIALIZED VIEW matview_ine_tab; + +-- +-- Tests for CMV/RMV with buffered-insert path. +-- +-- These tests verify correctness under both the buffered path (no volatile +-- functions) and the fallback single-row path (volatile functions present). +-- Path selection is not directly observable in SQL output. +-- + +-- CMV: basic correctness +CREATE MATERIALIZED VIEW mv_buffered_1 AS + SELECT g AS a, g * 10 AS b FROM generate_series(1, 5) g; +SELECT count(*) FROM mv_buffered_1; +SELECT * FROM mv_buffered_1 ORDER BY a; + +-- RMV: refresh repopulates correctly +REFRESH MATERIALIZED VIEW mv_buffered_1; +SELECT count(*) FROM mv_buffered_1; +SELECT * FROM mv_buffered_1 ORDER BY a; +DROP MATERIALIZED VIEW mv_buffered_1; + +-- CMV + RMV: bulk case to exercise auto-flush (>1000 rows) +CREATE MATERIALIZED VIEW mv_buffered_bulk AS + SELECT g AS a FROM generate_series(1, 2500) g; +SELECT count(*) FROM mv_buffered_bulk; +SELECT min(a), max(a) FROM mv_buffered_bulk; +REFRESH MATERIALIZED VIEW mv_buffered_bulk; +SELECT count(*) FROM mv_buffered_bulk; +SELECT min(a), max(a) FROM mv_buffered_bulk; +DROP MATERIALIZED VIEW mv_buffered_bulk; + +-- Wide tuples: verify no regression +CREATE MATERIALIZED VIEW mv_buffered_wide AS + SELECT g AS id, + repeat('x', 200) AS col1, + repeat('y', 200) AS col2, + repeat('z', 200) AS col3 + FROM generate_series(1, 100) g; +SELECT count(*) FROM mv_buffered_wide; +SELECT id, length(col1), length(col2), length(col3) + FROM mv_buffered_wide WHERE id IN (1, 50, 100) ORDER BY id; +DROP MATERIALIZED VIEW mv_buffered_wide; + +-- Volatile-function fallback: random() triggers conservative guard. +-- Result must still be correct through the single-row path. +CREATE MATERIALIZED VIEW mv_volatile AS + SELECT g AS a, random() AS r FROM generate_series(1, 10) g; +SELECT count(*) FROM mv_volatile; +SELECT a FROM mv_volatile ORDER BY a; +REFRESH MATERIALIZED VIEW mv_volatile; +SELECT count(*) FROM mv_volatile; +SELECT a FROM mv_volatile ORDER BY a; +DROP MATERIALIZED VIEW mv_volatile; + +-- WITH NO DATA: unchanged behavior +CREATE MATERIALIZED VIEW mv_nodata AS SELECT 1 AS a WITH NO DATA; +SELECT count(*) FROM mv_nodata; -- error: not populated +REFRESH MATERIALIZED VIEW mv_nodata WITH NO DATA; +DROP MATERIALIZED VIEW mv_nodata; -- 2.52.0