public inbox for [email protected]
help / color / mirror / Atom feedFrom: Maxim Orlov <[email protected]>
To: wenhui qiu <[email protected]>
Cc: Heikki Linnakangas <[email protected]>
Cc: Alexander Korotkov <[email protected]>
Cc: Postgres hackers <[email protected]>
Subject: Re: POC: make mxidoff 64 bits
Date: Wed, 29 Jan 2025 17:04:28 +0300
Message-ID: <CACG=ezYbYO_KHWdeDedbDcY0tOS0JfaqBxG3=bG5+DdsDK4MpQ@mail.gmail.com> (raw)
In-Reply-To: <CAGjGUA+BfcWyccNN4=tHsW_E-koRxbg8h8ut6hjvPsHMgmek6w@mail.gmail.com>
References: <CACG=ezaWg7_nt-8ey4aKv2w9LcuLthHknwCawmBgEeTnJrJTcw@mail.gmail.com>
<CACG=ezaMncd0-BcGHBgsSR2eqHfrz9WznHGLKX8biz6zu-azGw@mail.gmail.com>
<[email protected]>
<CACG=ezb9XTvd3ZmS0y8gUunx_wBBdJO7ou+BfCOnnA5jE-11vg@mail.gmail.com>
<CACG=ezYFNqGjsxF6Vb2CHF6JzKcjhAFauaFm9js0nu_3Ngcdkw@mail.gmail.com>
<CAGjGUA+dcV7veaCV1H65vCNsbS++nT8=ho772gDvsXUW9H7eXQ@mail.gmail.com>
<CACG=ezYThNkf8QsDA-aQfEFEkqn2L=_uUL83z0vJstPRasbZqg@mail.gmail.com>
<[email protected]>
<CACG=ezYtCatcRODS-ZkwhcxuqBKCuhEsZGBruw=dGCLoepF+ZA@mail.gmail.com>
<[email protected]>
<CACG=ezb680eb=JXh1ns=t5eGH3h9y-uTfT4tf3Xc8t2UH2q6tQ@mail.gmail.com>
<CACG=ezZGQFBb0yepka8hU2BmJ48ujt3xa+aYLNL0BQPx0vqwZg@mail.gmail.com>
<CACG=ezajc_Pcqmy6fcq-N8+LzCRMzOzJzez2_BgHEu-6RVJtKQ@mail.gmail.com>
<[email protected]>
<CACG=ezbKwypBp=14q9+hMQApus3=1hKxJ9x1+KinUhtT48570Q@mail.gmail.com>
<[email protected]>
<CACG=ezZwdvsijzuXE3hex3xHcoz75EQYBXRTsQJVwbo5J5sS3g@mail.gmail.com>
<CACG=ezbs912S58=uR17b4w8uuWv1=OcCRaTW_OWdFm4+tXZA6w@mail.gmail.com>
<CAGjGUA+BfcWyccNN4=tHsW_E-koRxbg8h8ut6hjvPsHMgmek6w@mail.gmail.com>
Here is a v13 version with small changes to make cf bot happy.
--
Best regards,
Maxim Orlov.
From a989c58abfabb07c8778de339d617690f6654f79 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Tue, 19 Nov 2024 17:08:10 +0300
Subject: [PATCH v13 5/7] TEST: add src/bin/pg_upgrade/t/005_offset.pl
---
src/bin/pg_upgrade/t/005_offset.pl | 563 +++++++++++++++++++++++++++++
1 file changed, 563 insertions(+)
create mode 100644 src/bin/pg_upgrade/t/005_offset.pl
diff --git a/src/bin/pg_upgrade/t/005_offset.pl b/src/bin/pg_upgrade/t/005_offset.pl
new file mode 100644
index 0000000000..df84186de4
--- /dev/null
+++ b/src/bin/pg_upgrade/t/005_offset.pl
@@ -0,0 +1,563 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+
+use File::Find qw(find);
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# This pair of calls will create significantly more member segments than offset
+# segments.
+sub prep
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ $node->safe_psql('postgres',
+ "CREATE TABLE ${tbl} (I INT PRIMARY KEY, N_UPDATED INT) " .
+ " WITH (AUTOVACUUM_ENABLED=FALSE);" .
+ "INSERT INTO ${tbl} SELECT G, 0 FROM GENERATE_SERIES(1, 50) G;");
+}
+
+sub fill
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ my $nclients = 50;
+ my $update_every = 90;
+ my @connections = ();
+
+ for (0..$nclients)
+ {
+ my $conn = $node->background_psql('postgres');
+ $conn->query_safe("BEGIN");
+
+ push(@connections, $conn);
+ }
+
+ for (my $i = 0; $i < 20000; $i++)
+ {
+ my $conn = $connections[$i % $nclients];
+
+ $conn->query_safe("COMMIT;");
+ $conn->query_safe("BEGIN");
+
+ if ($i % $update_every == 0)
+ {
+ $conn->query_safe(
+ "UPDATE ${tbl} SET " .
+ "N_UPDATED = N_UPDATED + 1 " .
+ "WHERE I = ${i} % 50");
+ }
+ else
+ {
+ $conn->query_safe(
+ "SELECT * FROM ${tbl} FOR KEY SHARE");
+ }
+ }
+
+ for my $conn (@connections)
+ {
+ $conn->quit();
+ }
+}
+
+# This pair of calls will create more or less the same amount of membsers and
+# offsets segments.
+sub prep2
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ $node->safe_psql('postgres',
+ "CREATE TABLE ${tbl}(BAR INT PRIMARY KEY, BAZ INT); " .
+ "CREATE OR REPLACE PROCEDURE MXIDFILLER(N_STEPS INT DEFAULT 1000) " .
+ "LANGUAGE PLPGSQL " .
+ "AS \$\$ " .
+ "BEGIN " .
+ " FOR I IN 1..N_STEPS LOOP " .
+ " UPDATE ${tbl} SET BAZ = RANDOM(1, 1000) " .
+ " WHERE BAR IN (SELECT BAR FROM ${tbl} " .
+ " TABLESAMPLE BERNOULLI(80)); " .
+ " COMMIT; " .
+ " END LOOP; " .
+ "END; \$\$; " .
+ "INSERT INTO ${tbl} (BAR, BAZ) " .
+ "SELECT ID, ID FROM GENERATE_SERIES(1, 1024) ID;");
+}
+
+sub fill2
+{
+ my $node = shift;
+ my $tbl = shift;
+ my $scale = shift // 1;
+
+ $node->safe_psql('postgres',
+ "BEGIN; " .
+ "SELECT * FROM ${tbl} FOR KEY SHARE; " .
+ "PREPARE TRANSACTION 'A'; " .
+ "CALL MXIDFILLER((365 * ${scale})::int); " .
+ "COMMIT PREPARED 'A';");
+}
+
+
+# generate around 2 offset segments and 55 member segments
+sub mxid_gen1
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ prep($node, $tbl);
+ fill($node, $tbl);
+
+ $node->safe_psql('postgres', q(CHECKPOINT));
+}
+
+# generate around 10 offset segments and 12 member segments
+sub mxid_gen2
+{
+ my $node = shift;
+ my $tbl = shift;
+ my $scale = shift // 1;
+
+ prep2($node, $tbl);
+ fill2($node, $tbl, $scale);
+
+ $node->safe_psql('postgres', q(CHECKPOINT));
+}
+
+# Fetch latest multixact checkpoint values.
+sub multi_bounds
+{
+ my ($node) = @_;
+ my $path = $node->config_data('--bindir');
+ my ($stdout, $stderr) = run_command([
+ $path . '/pg_controldata',
+ $node->data_dir
+ ]);
+ my @control_data = split("\n", $stdout);
+ my $next = undef;
+ my $oldest = undef;
+ my $next_offset = undef;
+
+ foreach (@control_data)
+ {
+ if ($_ =~ /^Latest checkpoint's NextMultiXactId:\s*(.*)$/mg)
+ {
+ $next = $1;
+ print ">>> @ node ". $node->name . ", " . $_ . "\n";
+ }
+
+ if ($_ =~ /^Latest checkpoint's oldestMultiXid:\s*(.*)$/mg)
+ {
+ $oldest = $1;
+ print ">>> @ node ". $node->name . ", " . $_ . "\n";
+ }
+
+ if ($_ =~ /^Latest checkpoint's NextMultiOffset:\s*(.*)$/mg)
+ {
+ $next_offset = $1;
+ print ">>> @ node ". $node->name . ", " . $_ . "\n";
+ }
+
+ if (defined($oldest) && defined($next) && defined($next_offset))
+ {
+ last;
+ }
+ }
+
+ die "Latest checkpoint's NextMultiXactId not found in control file!\n"
+ unless defined($next);
+
+ die "Latest checkpoint's oldestMultiXid not found in control file!\n"
+ unless defined($oldest);
+
+ die "Latest checkpoint's NextMultiOffset not found in control file!\n"
+ unless defined($next_offset);
+
+ return ($oldest, $next, $next_offset);
+}
+
+# Create node from existing bins.
+sub create_new_node
+{
+ my ($name, %params) = @_;
+
+ create_node(0, @_);
+}
+
+# Create node from ENV oldinstall
+sub create_old_node
+{
+ my ($name, %params) = @_;
+
+ if (!defined($ENV{oldinstall}))
+ {
+ die "oldinstall is not defined";
+ }
+
+ create_node(1, @_);
+}
+
+sub create_node
+{
+ my ($install_path_from_env, $name, %params) = @_;
+ my $scale = defined $params{scale} ? $params{scale} : 1;
+ my $multi = defined $params{multi} ? $params{multi} : undef;
+ my $offset = defined $params{offset} ? $params{offset} : undef;
+
+ my $node =
+ $install_path_from_env ?
+ PostgreSQL::Test::Cluster->new($name,
+ install_path => $ENV{oldinstall}) :
+ PostgreSQL::Test::Cluster->new($name);
+
+ $node->init(force_initdb => 1,
+ extra => [
+ $multi ? ('-m', $multi) : (),
+ $offset ? ('-o', $offset) : (),
+ ('-k'),
+ ]);
+
+ # Fixup MOX patch quirk
+ if ($multi)
+ {
+ unlink $node->data_dir . '/pg_multixact/offsets/0000';
+ }
+ if ($offset)
+ {
+ unlink $node->data_dir . '/pg_multixact/members/0000';
+ }
+
+ $node->append_conf('fsync', 'off');
+ $node->append_conf('postgresql.conf', 'max_prepared_transactions = 2');
+
+ $node->start();
+ mxid_gen2($node, 'FOO', $scale);
+ mxid_gen1($node, 'BAR', $scale);
+ $node->restart();
+ $node->safe_psql('postgres', q(SELECT * FROM FOO)); # just in case...
+ $node->safe_psql('postgres', q(SELECT * FROM BAR));
+ $node->safe_psql('postgres', q(CHECKPOINT));
+ $node->stop();
+
+ return $node;
+}
+
+sub do_upgrade
+{
+ my ($oldnode, $newnode) = @_;
+
+ command_ok(
+ [
+ 'pg_upgrade', '--no-sync',
+ '-d', $oldnode->data_dir,
+ '-D', $newnode->data_dir,
+ '-b', $oldnode->config_data('--bindir'),
+ '-B', $newnode->config_data('--bindir'),
+ '-s', $newnode->host,
+ '-p', $oldnode->port,
+ '-P', $newnode->port,
+ '--check'
+ ],
+ 'run of pg_upgrade');
+
+ command_ok(
+ [
+ 'pg_upgrade', '--no-sync',
+ '-d', $oldnode->data_dir,
+ '-D', $newnode->data_dir,
+ '-b', $oldnode->config_data('--bindir'),
+ '-B', $newnode->config_data('--bindir'),
+ '-s', $newnode->host,
+ '-p', $oldnode->port,
+ '-P', $newnode->port,
+ '--copy'
+ ],
+ 'run of pg_upgrade');
+
+ $oldnode->start();
+ $newnode->start();
+
+ my $oldfoo = $oldnode->safe_psql('postgres', q(SELECT * FROM FOO));
+ my $newfoo = $newnode->safe_psql('postgres', q(SELECT * FROM FOO));
+ is($oldfoo, $newfoo, "select foo eq");
+
+ my $oldbar = $oldnode->safe_psql('postgres', q(SELECT * FROM BAR));
+ my $newbar = $newnode->safe_psql('postgres', q(SELECT * FROM BAR));
+ is($oldbar, $newbar, "select bar eq");
+
+ $oldnode->stop();
+ $newnode->stop();
+
+ multi_bounds($oldnode);
+ multi_bounds($newnode);
+}
+
+my @TESTS = (
+ # tests without ENV oldinstall
+ 0, 1, 2, 3, 4, 5, 6,
+ # tests with "real" pg_upgrade
+ 100, 101, 102, 103, 104, 105, 106,
+ # self upgrade
+ 1000,
+);
+
+# =============================================================================
+# Basic sanity tests on a NEW bin
+# =============================================================================
+
+# starts from the zero
+SKIP:
+{
+ my $TEST_NO = 0;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_mo',
+ scale => 1);
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value
+SKIP:
+{
+ my $TEST_NO = 1;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_Mo',
+ scale => 1.15,
+ multi => '0x123400');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 2;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_mO',
+ scale => 1.15,
+ offset => '0x432100');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi and offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 3;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_MO',
+ scale => 1.15,
+ multi => '0xDEAD00', offset => '0xBEEF00');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, multi wrap
+SKIP:
+{
+ my $TEST_NO = 4;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_Mo_wrap',
+ scale => 1.15,
+ multi => '0xFFFF7000');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 5;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_mO_wrap',
+ scale => 1.15,
+ offset => '0xFFFFFC00');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, offsets starts from the value,
+# multi wrap, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 6;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_MO_wrap',
+ scale => 1.15,
+ multi => '0xFFFF7000', offset => '0xFFFFFC00');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# =============================================================================
+# pg_upgarde tests
+# =============================================================================
+
+# starts from the zero
+SKIP:
+{
+ my $TEST_NO = 100;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'mo';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1);
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value
+SKIP:
+{
+ my $TEST_NO = 101;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'Mo';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0x123400');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 102;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'mO';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ offset => '0x432100');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi and offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 103;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'MO';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0xDEAD00', offset => '0xBEEF00');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, multi wrap
+SKIP:
+{
+ my $TEST_NO = 104;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'Mo_wrap';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0xFFFF7000');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 105;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'mO_wrap';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ offset => '0xFFFFFC00');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, offsets starts from the value,
+# multi wrap, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 106;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'MO_wrap';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0xFFFF7000', offset => '0xFFFFFC00');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# =============================================================================
+# Self upgrade
+# =============================================================================
+
+# starts from the zero
+SKIP:
+{
+ my $TEST_NO = 1000;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'self_upgrade';
+ my $oldnode = create_new_node("old_$dbname",
+ scale => 1);
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+done_testing();
--
2.43.0
From be906a9c2161e6972a396a9d283bb76ca023a808 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Wed, 4 May 2022 15:53:36 +0300
Subject: [PATCH v13 4/7] TEST: initdb option to initialize cluster with
non-standard xid/mxid/mxoff
To date testing database cluster wraparund was not easy as initdb has always
inited it with default xid/mxid/mxoff. The option to specify any valid
xid/mxid/mxoff at cluster startup will make these things easier.
Author: Maxim Orlov <[email protected]>
Author: Pavel Borisov <[email protected]>
Author: Svetlana Derevyanko <[email protected]>
Discussion: https://www.postgresql.org/message-id/flat/CACG%3Dezaa4vqYjJ16yoxgrpa-%3DgXnf0Vv3Ey9bjGrRRFN2YyWFQ%4...
---
src/backend/access/transam/clog.c | 21 +++++
src/backend/access/transam/multixact.c | 53 ++++++++++++
src/backend/access/transam/subtrans.c | 8 +-
src/backend/access/transam/xlog.c | 15 ++--
src/backend/bootstrap/bootstrap.c | 50 +++++++++++-
src/backend/main/main.c | 6 ++
src/backend/postmaster/postmaster.c | 14 +++-
src/backend/tcop/postgres.c | 53 +++++++++++-
src/bin/initdb/initdb.c | 107 ++++++++++++++++++++++++-
src/bin/initdb/t/001_initdb.pl | 60 ++++++++++++++
src/include/access/xlog.h | 3 +
src/include/c.h | 4 +
src/include/catalog/pg_class.h | 2 +-
13 files changed, 382 insertions(+), 14 deletions(-)
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 0d556c00b8..89516e9f52 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -834,6 +834,7 @@ BootStrapCLOG(void)
{
int slotno;
LWLock *lock = SimpleLruGetBankLock(XactCtl, 0);
+ int64 pageno;
LWLockAcquire(lock, LW_EXCLUSIVE);
@@ -844,6 +845,26 @@ BootStrapCLOG(void)
SimpleLruWritePage(XactCtl, slotno);
Assert(!XactCtl->shared->page_dirty[slotno]);
+ pageno = TransactionIdToPage(XidFromFullTransactionId(TransamVariables->nextXid));
+ if (pageno != 0)
+ {
+ LWLock *nextlock = SimpleLruGetBankLock(XactCtl, pageno);
+
+ if (nextlock != lock)
+ {
+ LWLockRelease(lock);
+ LWLockAcquire(nextlock, LW_EXCLUSIVE);
+ lock = nextlock;
+ }
+
+ /* Create and zero the first page of the commit log */
+ slotno = ZeroCLOGPage(pageno, false);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(XactCtl, slotno);
+ Assert(!XactCtl->shared->page_dirty[slotno]);
+ }
+
LWLockRelease(lock);
}
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index d63ae17330..70c9d2f6ee 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1815,6 +1815,7 @@ BootStrapMultiXact(void)
{
int slotno;
LWLock *lock;
+ int64 pageno;
lock = SimpleLruGetBankLock(MultiXactOffsetCtl, 0);
LWLockAcquire(lock, LW_EXCLUSIVE);
@@ -1826,6 +1827,26 @@ BootStrapMultiXact(void)
SimpleLruWritePage(MultiXactOffsetCtl, slotno);
Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
+ pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
+ if (pageno != 0)
+ {
+ LWLock *nextlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
+
+ if (nextlock != lock)
+ {
+ LWLockRelease(lock);
+ LWLockAcquire(nextlock, LW_EXCLUSIVE);
+ lock = nextlock;
+ }
+
+ /* Create and zero the first page of the offsets log */
+ slotno = ZeroMultiXactOffsetPage(pageno, false);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(MultiXactOffsetCtl, slotno);
+ Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
+ }
+
LWLockRelease(lock);
lock = SimpleLruGetBankLock(MultiXactMemberCtl, 0);
@@ -1838,7 +1859,39 @@ BootStrapMultiXact(void)
SimpleLruWritePage(MultiXactMemberCtl, slotno);
Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
+ pageno = MXOffsetToMemberPage(MultiXactState->nextOffset);
+ if (pageno != 0)
+ {
+ LWLock *nextlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
+
+ if (nextlock != lock)
+ {
+ LWLockRelease(lock);
+ LWLockAcquire(nextlock, LW_EXCLUSIVE);
+ lock = nextlock;
+ }
+
+ /* Create and zero the first page of the members log */
+ slotno = ZeroMultiXactMemberPage(pageno, false);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(MultiXactMemberCtl, slotno);
+ Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
+ }
+
LWLockRelease(lock);
+
+ /*
+ * If we're starting not from zero offset, initilize dummy multixact to
+ * evade too long loop in PerformMembersTruncation().
+ */
+ if (MultiXactState->nextOffset > 0 && MultiXactState->nextMXact > 0)
+ {
+ RecordNewMultiXact(FirstMultiXactId,
+ MultiXactState->nextOffset, 0, NULL);
+ RecordNewMultiXact(MultiXactState->nextMXact,
+ MultiXactState->nextOffset, 0, NULL);
+ }
}
/*
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 15153618fa..218675fa60 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -270,12 +270,16 @@ void
BootStrapSUBTRANS(void)
{
int slotno;
- LWLock *lock = SimpleLruGetBankLock(SubTransCtl, 0);
+ LWLock *lock;
+ int64 pageno;
+
+ pageno = TransactionIdToPage(XidFromFullTransactionId(TransamVariables->nextXid));
+ lock = SimpleLruGetBankLock(SubTransCtl, pageno);
LWLockAcquire(lock, LW_EXCLUSIVE);
/* Create and zero the first page of the subtrans log */
- slotno = ZeroSUBTRANSPage(0);
+ slotno = ZeroSUBTRANSPage(pageno);
/* Make sure it's written out */
SimpleLruWritePage(SubTransCtl, slotno);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a813a090fa..9f78a3e34a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -136,6 +136,10 @@ int max_slot_wal_keep_size_mb = -1;
int wal_decode_buffer_size = 512 * 1024;
bool track_wal_io_timing = false;
+TransactionId start_xid = FirstNormalTransactionId;
+MultiXactId start_mxid = FirstMultiXactId;
+MultiXactOffset start_mxoff = 0;
+
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
#endif
@@ -5080,13 +5084,14 @@ BootStrapXLOG(uint32 data_checksum_version)
checkPoint.fullPageWrites = fullPageWrites;
checkPoint.wal_level = wal_level;
checkPoint.nextXid =
- FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+ FullTransactionIdFromEpochAndXid(0, Max(FirstNormalTransactionId,
+ start_xid));
checkPoint.nextOid = FirstGenbkiObjectId;
- checkPoint.nextMulti = FirstMultiXactId;
- checkPoint.nextMultiOffset = 1;
- checkPoint.oldestXid = FirstNormalTransactionId;
+ checkPoint.nextMulti = Max(FirstMultiXactId, start_mxid);
+ checkPoint.nextMultiOffset = Max(1, start_mxoff);
+ checkPoint.oldestXid = XidFromFullTransactionId(checkPoint.nextXid);
checkPoint.oldestXidDB = Template1DbOid;
- checkPoint.oldestMulti = FirstMultiXactId;
+ checkPoint.oldestMulti = checkPoint.nextMulti;
checkPoint.oldestMultiDB = Template1DbOid;
checkPoint.oldestCommitTsXid = InvalidTransactionId;
checkPoint.newestCommitTsXid = InvalidTransactionId;
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 359f58a8f9..b697138b7e 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -218,7 +218,7 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
argv++;
argc--;
- while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:X:-:")) != -1)
+ while ((flag = getopt(argc, argv, "B:c:d:D:Fkm:o:r:X:x:-:")) != -1)
{
switch (flag)
{
@@ -286,12 +286,60 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
case 'k':
bootstrap_data_checksum_version = PG_DATA_CHECKSUM_VERSION;
break;
+ case 'm':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactIdIsValid(start_mxid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact id")));
+ }
+ }
+ break;
+ case 'o':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxoff = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactOffsetIsValid(start_mxoff))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact offset")));
+ }
+ }
+ break;
case 'r':
strlcpy(OutputFileName, optarg, MAXPGPATH);
break;
case 'X':
SetConfigOption("wal_segment_size", optarg, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
break;
+ case 'x':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_xid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartTransactionIdIsValid(start_xid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster xid value")));
+ }
+ }
+ break;
default:
write_stderr("Try \"%s --help\" for more information.\n",
progname);
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index e8effe5024..ff252dffbd 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -426,12 +426,18 @@ help(const char *progname)
printf(_(" -E echo statement before execution\n"));
printf(_(" -j do not use newline as interactive query delimiter\n"));
printf(_(" -r FILENAME send stdout and stderr to given file\n"));
+ printf(_(" -m START_MXID set initial database cluster multixact id\n"));
+ printf(_(" -o START_MXOFF set initial database cluster multixact offset\n"));
+ printf(_(" -x START_XID set initial database cluster xid\n"));
printf(_("\nOptions for bootstrapping mode:\n"));
printf(_(" --boot selects bootstrapping mode (must be first argument)\n"));
printf(_(" --check selects check mode (must be first argument)\n"));
printf(_(" DBNAME database name (mandatory argument in bootstrapping mode)\n"));
printf(_(" -r FILENAME send stdout and stderr to given file\n"));
+ printf(_(" -m START_MXID set initial database cluster multixact id\n"));
+ printf(_(" -o START_MXOFF set initial database cluster multixact offset\n"));
+ printf(_(" -x START_XID set initial database cluster xid\n"));
printf(_("\nPlease read the documentation for the complete list of run-time\n"
"configuration settings and how to set them on the command line or in\n"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index bb22b13ade..028a734517 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -585,7 +585,7 @@ PostmasterMain(int argc, char *argv[])
* tcop/postgres.c (the option sets should not conflict) and with the
* common help() function in main/main.c.
*/
- while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
+ while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lm:N:Oo:Pp:r:S:sTt:W:x:-:")) != -1)
{
switch (opt)
{
@@ -695,10 +695,18 @@ PostmasterMain(int argc, char *argv[])
SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
+ case 'm':
+ /* only used by single-user backend */
+ break;
+
case 'O':
SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
break;
+ case 'o':
+ /* only used by single-user backend */
+ break;
+
case 'P':
SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
break;
@@ -749,6 +757,10 @@ PostmasterMain(int argc, char *argv[])
SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
+ case 'x':
+ /* only used by single-user backend */
+ break;
+
default:
write_stderr("Try \"%s --help\" for more information.\n",
progname);
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5655348a2e..9c170f4906 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3788,7 +3788,7 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
* postmaster/postmaster.c (the option sets should not conflict) and with
* the common help() function in main/main.c.
*/
- while ((flag = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:v:W:-:")) != -1)
+ while ((flag = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lm:N:nOo:Pp:r:S:sTt:v:W:x:-:")) != -1)
{
switch (flag)
{
@@ -3893,6 +3893,23 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
SetConfigOption("ssl", "true", ctx, gucsource);
break;
+ case 'm':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactIdIsValid(start_mxid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact id")));
+ }
+ }
+ break;
+
case 'N':
SetConfigOption("max_connections", optarg, ctx, gucsource);
break;
@@ -3905,6 +3922,23 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
SetConfigOption("allow_system_table_mods", "true", ctx, gucsource);
break;
+ case 'o':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxoff = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactOffsetIsValid(start_mxoff))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact offset")));
+ }
+ }
+ break;
+
case 'P':
SetConfigOption("ignore_system_indexes", "true", ctx, gucsource);
break;
@@ -3959,6 +3993,23 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
SetConfigOption("post_auth_delay", optarg, ctx, gucsource);
break;
+ case 'x':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_xid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartTransactionIdIsValid(start_xid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster xid")));
+ }
+ }
+ break;
+
default:
errs++;
break;
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 759672a9b9..125bfb6736 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -168,6 +168,9 @@ static bool data_checksums = true;
static char *xlog_dir = NULL;
static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+static TransactionId start_xid = 0;
+static MultiXactId start_mxid = 0;
+static MultiXactOffset start_mxoff = 0;
/* internal vars */
@@ -1596,6 +1599,11 @@ bootstrap_template1(void)
bki_lines = replace_token(bki_lines, "POSTGRES",
escape_quotes_bki(username));
+ /* relfrozenxid must not be less than FirstNormalTransactionId */
+ sprintf(buf, "%llu", (unsigned long long) Max(start_xid, 3));
+ bki_lines = replace_token(bki_lines, "RECENTXMIN",
+ buf);
+
bki_lines = replace_token(bki_lines, "ENCODING",
encodingid_to_string(encodingid));
@@ -1621,6 +1629,9 @@ bootstrap_template1(void)
printfPQExpBuffer(&cmd, "\"%s\" --boot %s %s", backend_exec, boot_options, extra_options);
appendPQExpBuffer(&cmd, " -X %d", wal_segment_size_mb * (1024 * 1024));
+ appendPQExpBuffer(&cmd, " -m %llu", (unsigned long long) start_mxid);
+ appendPQExpBuffer(&cmd, " -o %llu", (unsigned long long) start_mxoff);
+ appendPQExpBuffer(&cmd, " -x %llu", (unsigned long long) start_xid);
if (data_checksums)
appendPQExpBuffer(&cmd, " -k");
if (debug)
@@ -2562,12 +2573,20 @@ usage(const char *progname)
printf(_(" -d, --debug generate lots of debugging output\n"));
printf(_(" --discard-caches set debug_discard_caches=1\n"));
printf(_(" -L DIRECTORY where to find the input files\n"));
+ printf(_(" -m, --multixact-id=START_MXID\n"
+ " set initial database cluster multixact id\n"
+ " max value is 2^62-1\n"));
printf(_(" -n, --no-clean do not clean up after errors\n"));
printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" --no-instructions do not print instructions for next steps\n"));
+ printf(_(" -o, --multixact-offset=START_MXOFF\n"
+ " set initial database cluster multixact offset\n"
+ " max value is 2^62-1\n"));
printf(_(" -s, --show show internal settings, then exit\n"));
printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
printf(_(" -S, --sync-only only sync database files to disk, then exit\n"));
+ printf(_(" -x, --xid=START_XID set initial database cluster xid\n"
+ " max value is 2^62-1\n"));
printf(_("\nOther options:\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
@@ -3102,6 +3121,18 @@ initialize_data_directory(void)
/* Now create all the text config files */
setup_config();
+ if (start_mxid != 0)
+ printf(_("selecting initial multixact id ... %llu\n"),
+ (unsigned long long) start_mxid);
+
+ if (start_mxoff != 0)
+ printf(_("selecting initial multixact offset ... %llu\n"),
+ (unsigned long long) start_mxoff);
+
+ if (start_xid != 0)
+ printf(_("selecting initial xid ... %llu\n"),
+ (unsigned long long) start_xid);
+
/* Bootstrap template1 */
bootstrap_template1();
@@ -3118,8 +3149,12 @@ initialize_data_directory(void)
fflush(stdout);
initPQExpBuffer(&cmd);
- printfPQExpBuffer(&cmd, "\"%s\" %s %s template1 >%s",
- backend_exec, backend_options, extra_options, DEVNULL);
+ printfPQExpBuffer(&cmd, "\"%s\" %s %s",
+ backend_exec, backend_options, extra_options);
+ appendPQExpBuffer(&cmd, " -m %llu", (unsigned long long) start_mxid);
+ appendPQExpBuffer(&cmd, " -o %llu", (unsigned long long) start_mxoff);
+ appendPQExpBuffer(&cmd, " -x %llu", (unsigned long long) start_xid);
+ appendPQExpBuffer(&cmd, " template1 >%s", DEVNULL);
PG_CMD_OPEN(cmd.data);
@@ -3206,6 +3241,9 @@ main(int argc, char *argv[])
{"icu-rules", required_argument, NULL, 18},
{"sync-method", required_argument, NULL, 19},
{"no-data-checksums", no_argument, NULL, 20},
+ {"xid", required_argument, NULL, 'x'},
+ {"multixact-id", required_argument, NULL, 'm'},
+ {"multixact-offset", required_argument, NULL, 'o'},
{NULL, 0, NULL, 0}
};
@@ -3247,7 +3285,7 @@ main(int argc, char *argv[])
/* process command-line options */
- while ((c = getopt_long(argc, argv, "A:c:dD:E:gkL:nNsST:U:WX:",
+ while ((c = getopt_long(argc, argv, "A:c:dD:E:gkL:m:nNo:sST:U:Wx:X:",
long_options, &option_index)) != -1)
{
switch (c)
@@ -3305,6 +3343,30 @@ main(int argc, char *argv[])
debug = true;
printf(_("Running in debug mode.\n"));
break;
+ case 'm':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactIdIsValid(start_mxid))
+ {
+ pg_log_error("invalid initial database cluster multixact id");
+ exit(1);
+ }
+ else if (start_mxid < 1) /* FirstMultiXactId */
+ {
+ /*
+ * We avoid mxid to be silently set to
+ * FirstMultiXactId, though it does not harm.
+ */
+ pg_log_error("multixact id should be greater than 0");
+ exit(1);
+ }
+ }
+ break;
case 'n':
noclean = true;
printf(_("Running in no-clean mode. Mistakes will not be cleaned up.\n"));
@@ -3312,6 +3374,21 @@ main(int argc, char *argv[])
case 'N':
do_sync = false;
break;
+ case 'o':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxoff = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactOffsetIsValid(start_mxoff))
+ {
+ pg_log_error("invalid initial database cluster multixact offset");
+ exit(1);
+ }
+ }
+ break;
case 'S':
sync_only = true;
break;
@@ -3400,6 +3477,30 @@ main(int argc, char *argv[])
case 20:
data_checksums = false;
break;
+ case 'x':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_xid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartTransactionIdIsValid(start_xid))
+ {
+ pg_log_error("invalid value for initial database cluster xid");
+ exit(1);
+ }
+ else if (start_xid < 3) /* FirstNormalTransactionId */
+ {
+ /*
+ * We avoid xid to be silently set to
+ * FirstNormalTransactionId, though it does not harm.
+ */
+ pg_log_error("xid should be greater than 2");
+ exit(1);
+ }
+ }
+ break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 01cc4a1602..8b017eb907 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -329,4 +329,64 @@ command_fails(
[ 'pg_checksums', '--pgdata' => $datadir_nochecksums ],
"pg_checksums fails with data checksum disabled");
+# Set non-standard initial mxid/mxoff/xid.
+command_fails_like(
+ [ 'initdb', '-m', 'seven', $datadir ],
+ qr/initdb: error: invalid initial database cluster multixact id/,
+ 'fails for invalid initial database cluster multixact id');
+command_fails_like(
+ [ 'initdb', '-o', 'seven', $datadir ],
+ qr/initdb: error: invalid initial database cluster multixact offset/,
+ 'fails for invalid initial database cluster multixact offset');
+command_fails_like(
+ [ 'initdb', '-x', 'seven', $datadir ],
+ qr/initdb: error: invalid value for initial database cluster xid/,
+ 'fails for invalid initial database cluster xid');
+
+command_checks_all(
+ [ 'initdb', '-m', '65535', "$tempdir/data-m65535" ],
+ 0,
+ [qr/selecting initial multixact id ... 65535/],
+ [],
+ 'selecting initial multixact id');
+command_checks_all(
+ [ 'initdb', '-o', '65535', "$tempdir/data-o65535" ],
+ 0,
+ [qr/selecting initial multixact offset ... 65535/],
+ [],
+ 'selecting initial multixact offset');
+command_checks_all(
+ [ 'initdb', '-x', '65535', "$tempdir/data-x65535" ],
+ 0,
+ [qr/selecting initial xid ... 65535/],
+ [],
+ 'selecting initial xid');
+
+# Setup new cluster with given mxid/mxoff/xid.
+my $node;
+my $result;
+
+$node = PostgreSQL::Test::Cluster->new('test-mxid');
+$node->init(extra => ['-m', '16777215']); # 0xFFFFFF
+$node->start;
+$result = $node->safe_psql('postgres', "SELECT next_multixact_id FROM pg_control_checkpoint();");
+ok($result >= 16777215, 'setup cluster with given mxid');
+$node->stop;
+
+$node = PostgreSQL::Test::Cluster->new('test-mxoff');
+$node->init(extra => ['-o', '16777215']); # 0xFFFFFF
+$node->start;
+$result = $node->safe_psql('postgres', "SELECT next_multi_offset FROM pg_control_checkpoint();");
+ok($result >= 16777215, 'setup cluster with given mxoff');
+$node->stop;
+
+$node = PostgreSQL::Test::Cluster->new('test-xid');
+$node->init(extra => ['-x', '16777215']); # 0xFFFFFF
+$node->start;
+$result = $node->safe_psql('postgres', "SELECT txid_current();");
+ok($result >= 16777215, 'setup cluster with given xid - check 1');
+$result = $node->safe_psql('postgres', "SELECT oldest_xid FROM pg_control_checkpoint();");
+ok($result >= 16777215, 'setup cluster with given xid - check 2');
+$node->stop;
+
done_testing();
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 4411c1468a..8eb34846da 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -94,6 +94,9 @@ typedef enum RecoveryState
} RecoveryState;
extern PGDLLIMPORT int wal_level;
+extern PGDLLIMPORT TransactionId start_xid;
+extern PGDLLIMPORT MultiXactId start_mxid;
+extern PGDLLIMPORT MultiXactOffset start_mxoff;
/* Is WAL archiving enabled (always or only while server is running normally)? */
#define XLogArchivingActive() \
diff --git a/src/include/c.h b/src/include/c.h
index 318194f78d..4f2b5432e5 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -622,6 +622,10 @@ typedef uint64 MultiXactOffset;
typedef uint32 CommandId;
+#define StartTransactionIdIsValid(xid) ((xid) <= 0xFFFFFFFF)
+#define StartMultiXactIdIsValid(mxid) ((mxid) <= 0xFFFFFFFF)
+#define StartMultiXactOffsetIsValid(offset) ((offset) <= 0xFFFFFFFF)
+
#define FirstCommandId ((CommandId) 0)
#define InvalidCommandId (~(CommandId)0)
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index f0d612ca48..5c63290a72 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -123,7 +123,7 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
Oid relrewrite BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_class);
/* all Xids < this are frozen in this rel */
- TransactionId relfrozenxid BKI_DEFAULT(3); /* FirstNormalTransactionId */
+ TransactionId relfrozenxid BKI_DEFAULT(RECENTXMIN); /* FirstNormalTransactionId */
/* all multixacts in this rel are >= this; it is really a MultiXactId */
TransactionId relminmxid BKI_DEFAULT(1); /* FirstMultiXactId */
--
2.43.0
From 4a2e64b44cf8ec22a264d8fa495432f535482fb4 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Fri, 27 Dec 2024 19:39:58 +0300
Subject: [PATCH v13 6/7] TEST: try to replicate buggy oldest offset
---
src/bin/pg_upgrade/t/005_offset.pl | 59 ++++++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
diff --git a/src/bin/pg_upgrade/t/005_offset.pl b/src/bin/pg_upgrade/t/005_offset.pl
index df84186de4..2d91d101fa 100644
--- a/src/bin/pg_upgrade/t/005_offset.pl
+++ b/src/bin/pg_upgrade/t/005_offset.pl
@@ -305,6 +305,8 @@ my @TESTS = (
100, 101, 102, 103, 104, 105, 106,
# self upgrade
1000,
+ # buggy
+ 2000
);
# =============================================================================
@@ -560,4 +562,61 @@ SKIP:
ok(1, "TEST $TEST_NO PASSED");
}
+# =============================================================================
+# Buggy
+# =============================================================================
+
+SKIP:
+{
+ my $TEST_NO = 2000;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ if (!defined($ENV{oldinstall}))
+ {
+ die "oldinstall is not defined";
+ }
+
+ my $dbname = 'buggy';
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ my $oldnode = PostgreSQL::Test::Cluster->new("old_$dbname",
+ install_path => $ENV{oldinstall});
+ $newnode->init;
+ $oldnode->init(force_initdb => 1);
+
+ $oldnode->append_conf('postgresql.conf', q(
+ autovacuum = off
+ max_prepared_transactions = 2
+ fsync = off
+ ));
+ $oldnode->start;
+
+ mxid_gen2($oldnode, 'FOO', 1.25);
+ mxid_gen2($oldnode, 'BAR', 1.25);
+
+ $oldnode->safe_psql('postgres', q(
+ DROP TABLE BAR;
+ CHECKPOINT;
+ ));
+ $oldnode->stop;
+
+ unlink($oldnode->data_dir . "/pg_multixact/offsets/0000");
+
+ command_ok(
+ [
+ 'pg_upgrade', '--no-sync',
+ '-d', $oldnode->data_dir,
+ '-D', $newnode->data_dir,
+ '-b', $oldnode->config_data('--bindir'),
+ '-B', $newnode->config_data('--bindir'),
+ '-s', $newnode->host,
+ '-p', $oldnode->port,
+ '-P', $newnode->port,
+ '--copy'
+ ],
+ 'run of pg_upgrade');
+
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
done_testing();
--
2.43.0
From 52b7019b4b964bd221de2c891d2b1f073b7465bf Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Wed, 13 Nov 2024 16:34:34 +0300
Subject: [PATCH v13 7/7] TEST: bump catver
---
src/bin/pg_upgrade/pg_upgrade.h | 2 +-
src/include/catalog/catversion.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 9b3d645b08..0fd791c442 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -119,7 +119,7 @@ extern char *output_files[];
*
* XXX: should be changed to the actual CATALOG_VERSION_NO on commit.
*/
-#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202409041
+#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202501283
/*
* large object chunk size added to pg_controldata,
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 6edaa20368..dfcb940501 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202501282
+#define CATALOG_VERSION_NO 202501283
#endif
--
2.43.0
Attachments:
[text/plain] v13-0005-TEST-add-src-bin-pg_upgrade-t-005_offset.pl.patch.txt (13.5K, 3-v13-0005-TEST-add-src-bin-pg_upgrade-t-005_offset.pl.patch.txt)
download | inline diff:
From a989c58abfabb07c8778de339d617690f6654f79 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Tue, 19 Nov 2024 17:08:10 +0300
Subject: [PATCH v13 5/7] TEST: add src/bin/pg_upgrade/t/005_offset.pl
---
src/bin/pg_upgrade/t/005_offset.pl | 563 +++++++++++++++++++++++++++++
1 file changed, 563 insertions(+)
create mode 100644 src/bin/pg_upgrade/t/005_offset.pl
diff --git a/src/bin/pg_upgrade/t/005_offset.pl b/src/bin/pg_upgrade/t/005_offset.pl
new file mode 100644
index 0000000000..df84186de4
--- /dev/null
+++ b/src/bin/pg_upgrade/t/005_offset.pl
@@ -0,0 +1,563 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+
+use File::Find qw(find);
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# This pair of calls will create significantly more member segments than offset
+# segments.
+sub prep
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ $node->safe_psql('postgres',
+ "CREATE TABLE ${tbl} (I INT PRIMARY KEY, N_UPDATED INT) " .
+ " WITH (AUTOVACUUM_ENABLED=FALSE);" .
+ "INSERT INTO ${tbl} SELECT G, 0 FROM GENERATE_SERIES(1, 50) G;");
+}
+
+sub fill
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ my $nclients = 50;
+ my $update_every = 90;
+ my @connections = ();
+
+ for (0..$nclients)
+ {
+ my $conn = $node->background_psql('postgres');
+ $conn->query_safe("BEGIN");
+
+ push(@connections, $conn);
+ }
+
+ for (my $i = 0; $i < 20000; $i++)
+ {
+ my $conn = $connections[$i % $nclients];
+
+ $conn->query_safe("COMMIT;");
+ $conn->query_safe("BEGIN");
+
+ if ($i % $update_every == 0)
+ {
+ $conn->query_safe(
+ "UPDATE ${tbl} SET " .
+ "N_UPDATED = N_UPDATED + 1 " .
+ "WHERE I = ${i} % 50");
+ }
+ else
+ {
+ $conn->query_safe(
+ "SELECT * FROM ${tbl} FOR KEY SHARE");
+ }
+ }
+
+ for my $conn (@connections)
+ {
+ $conn->quit();
+ }
+}
+
+# This pair of calls will create more or less the same amount of membsers and
+# offsets segments.
+sub prep2
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ $node->safe_psql('postgres',
+ "CREATE TABLE ${tbl}(BAR INT PRIMARY KEY, BAZ INT); " .
+ "CREATE OR REPLACE PROCEDURE MXIDFILLER(N_STEPS INT DEFAULT 1000) " .
+ "LANGUAGE PLPGSQL " .
+ "AS \$\$ " .
+ "BEGIN " .
+ " FOR I IN 1..N_STEPS LOOP " .
+ " UPDATE ${tbl} SET BAZ = RANDOM(1, 1000) " .
+ " WHERE BAR IN (SELECT BAR FROM ${tbl} " .
+ " TABLESAMPLE BERNOULLI(80)); " .
+ " COMMIT; " .
+ " END LOOP; " .
+ "END; \$\$; " .
+ "INSERT INTO ${tbl} (BAR, BAZ) " .
+ "SELECT ID, ID FROM GENERATE_SERIES(1, 1024) ID;");
+}
+
+sub fill2
+{
+ my $node = shift;
+ my $tbl = shift;
+ my $scale = shift // 1;
+
+ $node->safe_psql('postgres',
+ "BEGIN; " .
+ "SELECT * FROM ${tbl} FOR KEY SHARE; " .
+ "PREPARE TRANSACTION 'A'; " .
+ "CALL MXIDFILLER((365 * ${scale})::int); " .
+ "COMMIT PREPARED 'A';");
+}
+
+
+# generate around 2 offset segments and 55 member segments
+sub mxid_gen1
+{
+ my $node = shift;
+ my $tbl = shift;
+
+ prep($node, $tbl);
+ fill($node, $tbl);
+
+ $node->safe_psql('postgres', q(CHECKPOINT));
+}
+
+# generate around 10 offset segments and 12 member segments
+sub mxid_gen2
+{
+ my $node = shift;
+ my $tbl = shift;
+ my $scale = shift // 1;
+
+ prep2($node, $tbl);
+ fill2($node, $tbl, $scale);
+
+ $node->safe_psql('postgres', q(CHECKPOINT));
+}
+
+# Fetch latest multixact checkpoint values.
+sub multi_bounds
+{
+ my ($node) = @_;
+ my $path = $node->config_data('--bindir');
+ my ($stdout, $stderr) = run_command([
+ $path . '/pg_controldata',
+ $node->data_dir
+ ]);
+ my @control_data = split("\n", $stdout);
+ my $next = undef;
+ my $oldest = undef;
+ my $next_offset = undef;
+
+ foreach (@control_data)
+ {
+ if ($_ =~ /^Latest checkpoint's NextMultiXactId:\s*(.*)$/mg)
+ {
+ $next = $1;
+ print ">>> @ node ". $node->name . ", " . $_ . "\n";
+ }
+
+ if ($_ =~ /^Latest checkpoint's oldestMultiXid:\s*(.*)$/mg)
+ {
+ $oldest = $1;
+ print ">>> @ node ". $node->name . ", " . $_ . "\n";
+ }
+
+ if ($_ =~ /^Latest checkpoint's NextMultiOffset:\s*(.*)$/mg)
+ {
+ $next_offset = $1;
+ print ">>> @ node ". $node->name . ", " . $_ . "\n";
+ }
+
+ if (defined($oldest) && defined($next) && defined($next_offset))
+ {
+ last;
+ }
+ }
+
+ die "Latest checkpoint's NextMultiXactId not found in control file!\n"
+ unless defined($next);
+
+ die "Latest checkpoint's oldestMultiXid not found in control file!\n"
+ unless defined($oldest);
+
+ die "Latest checkpoint's NextMultiOffset not found in control file!\n"
+ unless defined($next_offset);
+
+ return ($oldest, $next, $next_offset);
+}
+
+# Create node from existing bins.
+sub create_new_node
+{
+ my ($name, %params) = @_;
+
+ create_node(0, @_);
+}
+
+# Create node from ENV oldinstall
+sub create_old_node
+{
+ my ($name, %params) = @_;
+
+ if (!defined($ENV{oldinstall}))
+ {
+ die "oldinstall is not defined";
+ }
+
+ create_node(1, @_);
+}
+
+sub create_node
+{
+ my ($install_path_from_env, $name, %params) = @_;
+ my $scale = defined $params{scale} ? $params{scale} : 1;
+ my $multi = defined $params{multi} ? $params{multi} : undef;
+ my $offset = defined $params{offset} ? $params{offset} : undef;
+
+ my $node =
+ $install_path_from_env ?
+ PostgreSQL::Test::Cluster->new($name,
+ install_path => $ENV{oldinstall}) :
+ PostgreSQL::Test::Cluster->new($name);
+
+ $node->init(force_initdb => 1,
+ extra => [
+ $multi ? ('-m', $multi) : (),
+ $offset ? ('-o', $offset) : (),
+ ('-k'),
+ ]);
+
+ # Fixup MOX patch quirk
+ if ($multi)
+ {
+ unlink $node->data_dir . '/pg_multixact/offsets/0000';
+ }
+ if ($offset)
+ {
+ unlink $node->data_dir . '/pg_multixact/members/0000';
+ }
+
+ $node->append_conf('fsync', 'off');
+ $node->append_conf('postgresql.conf', 'max_prepared_transactions = 2');
+
+ $node->start();
+ mxid_gen2($node, 'FOO', $scale);
+ mxid_gen1($node, 'BAR', $scale);
+ $node->restart();
+ $node->safe_psql('postgres', q(SELECT * FROM FOO)); # just in case...
+ $node->safe_psql('postgres', q(SELECT * FROM BAR));
+ $node->safe_psql('postgres', q(CHECKPOINT));
+ $node->stop();
+
+ return $node;
+}
+
+sub do_upgrade
+{
+ my ($oldnode, $newnode) = @_;
+
+ command_ok(
+ [
+ 'pg_upgrade', '--no-sync',
+ '-d', $oldnode->data_dir,
+ '-D', $newnode->data_dir,
+ '-b', $oldnode->config_data('--bindir'),
+ '-B', $newnode->config_data('--bindir'),
+ '-s', $newnode->host,
+ '-p', $oldnode->port,
+ '-P', $newnode->port,
+ '--check'
+ ],
+ 'run of pg_upgrade');
+
+ command_ok(
+ [
+ 'pg_upgrade', '--no-sync',
+ '-d', $oldnode->data_dir,
+ '-D', $newnode->data_dir,
+ '-b', $oldnode->config_data('--bindir'),
+ '-B', $newnode->config_data('--bindir'),
+ '-s', $newnode->host,
+ '-p', $oldnode->port,
+ '-P', $newnode->port,
+ '--copy'
+ ],
+ 'run of pg_upgrade');
+
+ $oldnode->start();
+ $newnode->start();
+
+ my $oldfoo = $oldnode->safe_psql('postgres', q(SELECT * FROM FOO));
+ my $newfoo = $newnode->safe_psql('postgres', q(SELECT * FROM FOO));
+ is($oldfoo, $newfoo, "select foo eq");
+
+ my $oldbar = $oldnode->safe_psql('postgres', q(SELECT * FROM BAR));
+ my $newbar = $newnode->safe_psql('postgres', q(SELECT * FROM BAR));
+ is($oldbar, $newbar, "select bar eq");
+
+ $oldnode->stop();
+ $newnode->stop();
+
+ multi_bounds($oldnode);
+ multi_bounds($newnode);
+}
+
+my @TESTS = (
+ # tests without ENV oldinstall
+ 0, 1, 2, 3, 4, 5, 6,
+ # tests with "real" pg_upgrade
+ 100, 101, 102, 103, 104, 105, 106,
+ # self upgrade
+ 1000,
+);
+
+# =============================================================================
+# Basic sanity tests on a NEW bin
+# =============================================================================
+
+# starts from the zero
+SKIP:
+{
+ my $TEST_NO = 0;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_mo',
+ scale => 1);
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value
+SKIP:
+{
+ my $TEST_NO = 1;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_Mo',
+ scale => 1.15,
+ multi => '0x123400');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 2;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_mO',
+ scale => 1.15,
+ offset => '0x432100');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi and offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 3;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_MO',
+ scale => 1.15,
+ multi => '0xDEAD00', offset => '0xBEEF00');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, multi wrap
+SKIP:
+{
+ my $TEST_NO = 4;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_Mo_wrap',
+ scale => 1.15,
+ multi => '0xFFFF7000');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 5;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_mO_wrap',
+ scale => 1.15,
+ offset => '0xFFFFFC00');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, offsets starts from the value,
+# multi wrap, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 6;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $node = create_new_node('simple_MO_wrap',
+ scale => 1.15,
+ multi => '0xFFFF7000', offset => '0xFFFFFC00');
+ multi_bounds($node);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# =============================================================================
+# pg_upgarde tests
+# =============================================================================
+
+# starts from the zero
+SKIP:
+{
+ my $TEST_NO = 100;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'mo';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1);
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value
+SKIP:
+{
+ my $TEST_NO = 101;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'Mo';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0x123400');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 102;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'mO';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ offset => '0x432100');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi and offsets starts from the value
+SKIP:
+{
+ my $TEST_NO = 103;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'MO';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0xDEAD00', offset => '0xBEEF00');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, multi wrap
+SKIP:
+{
+ my $TEST_NO = 104;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'Mo_wrap';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0xFFFF7000');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# offsets starts from the value, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 105;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'mO_wrap';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ offset => '0xFFFFFC00');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# multi starts from the value, offsets starts from the value,
+# multi wrap, offsets wrap
+SKIP:
+{
+ my $TEST_NO = 106;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'MO_wrap';
+ my $oldnode = create_old_node("old_$dbname",
+ scale => 1.2,
+ multi => '0xFFFF7000', offset => '0xFFFFFC00');
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+# =============================================================================
+# Self upgrade
+# =============================================================================
+
+# starts from the zero
+SKIP:
+{
+ my $TEST_NO = 1000;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ my $dbname = 'self_upgrade';
+ my $oldnode = create_new_node("old_$dbname",
+ scale => 1);
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ $newnode->init();
+
+ do_upgrade($oldnode, $newnode);
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
+done_testing();
--
2.43.0
[application/octet-stream] v13-0003-Make-pg_upgrade-convert-multixact-offsets.patch (31.3K, 4-v13-0003-Make-pg_upgrade-convert-multixact-offsets.patch)
download | inline diff:
From e15f89143dd8aef70957e87d59c177fab66f9ce2 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Tue, 13 Aug 2024 14:44:50 +0300
Subject: [PATCH v13 3/7] Make pg_upgrade convert multixact offsets.
Author: Heikki Linnakangas <[email protected]>
Author: Maxim Orlov <[email protected]>
Author: Yura Sokolov <[email protected]>
---
src/backend/access/transam/multixact.c | 35 +--
src/bin/pg_upgrade/Makefile | 3 +
src/bin/pg_upgrade/meson.build | 3 +
src/bin/pg_upgrade/multixact_old.c | 338 +++++++++++++++++++++++++
src/bin/pg_upgrade/multixact_old.h | 12 +
src/bin/pg_upgrade/multixact_rewrite.c | 238 +++++++++++++++++
src/bin/pg_upgrade/pg_upgrade.c | 29 ++-
src/bin/pg_upgrade/pg_upgrade.h | 13 +-
src/bin/pg_upgrade/slru_io.c | 211 +++++++++++++++
src/bin/pg_upgrade/slru_io.h | 23 ++
10 files changed, 873 insertions(+), 32 deletions(-)
create mode 100644 src/bin/pg_upgrade/multixact_old.c
create mode 100644 src/bin/pg_upgrade/multixact_old.h
create mode 100644 src/bin/pg_upgrade/multixact_rewrite.c
create mode 100644 src/bin/pg_upgrade/slru_io.c
create mode 100644 src/bin/pg_upgrade/slru_io.h
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index cd9db52e95..d63ae17330 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1103,7 +1103,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
MultiXactOffset *offptr;
MultiXactOffset offset;
int length;
- int truelength;
MultiXactId oldestMXact;
MultiXactId nextMXact;
MultiXactId tmpMXact;
@@ -1202,15 +1201,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
* we have just for this; the process in charge will signal the CV as soon
* as it has finished writing the multixact offset.
*
- * 3. Because GetNewMultiXactId increments offset zero to offset one to
- * handle case #2, there is an ambiguity near the point of offset
- * wraparound. If we see next multixact's offset is one, is that our
- * multixact's actual endpoint, or did it end at zero with a subsequent
- * increment? We handle this using the knowledge that if the zero'th
- * member slot wasn't filled, it'll contain zero, and zero isn't a valid
- * transaction ID so it can't be a multixact member. Therefore, if we
- * read a zero from the members array, just ignore it.
- *
* This is all pretty messy, but the mess occurs only in infrequent corner
* cases, so it seems better than holding the MultiXactGenLock for a long
* time on every multixact creation.
@@ -1297,6 +1287,9 @@ retry:
LWLockRelease(lock);
lock = NULL;
+ /* A multixid with zero members should not happen */
+ Assert(length > 0);
+
/*
* If we slept above, clean up state; it's no longer needed.
*/
@@ -1305,7 +1298,6 @@ retry:
ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
- truelength = 0;
prev_pageno = -1;
for (int i = 0; i < length; i++, offset++)
{
@@ -1343,36 +1335,27 @@ retry:
xactptr = (TransactionId *)
(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
- if (!TransactionIdIsValid(*xactptr))
- {
- /* Corner case 3: we must be looking at unused slot zero */
- Assert(offset == 0);
- continue;
- }
+ Assert(TransactionIdIsValid(*xactptr));
flagsoff = MXOffsetToFlagsOffset(offset);
bshift = MXOffsetToFlagsBitShift(offset);
flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
- ptr[truelength].xid = *xactptr;
- ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
- truelength++;
+ ptr[i].xid = *xactptr;
+ ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
}
LWLockRelease(lock);
- /* A multixid with zero members should not happen */
- Assert(truelength > 0);
-
/*
* Copy the result into the local cache.
*/
- mXactCachePut(multi, truelength, ptr);
+ mXactCachePut(multi, length, ptr);
debug_elog3(DEBUG2, "GetMembers: no cache for %s",
- mxid_to_string(multi, truelength, ptr));
+ mxid_to_string(multi, length, ptr));
*members = ptr;
- return truelength;
+ return length;
}
/*
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index f83d2b5d30..b4ad01c00b 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -19,11 +19,14 @@ OBJS = \
file.o \
function.o \
info.o \
+ multixact_old.o \
+ multixact_rewrite.o \
option.o \
parallel.o \
pg_upgrade.o \
relfilenumber.o \
server.o \
+ slru_io.o \
tablespace.o \
task.o \
util.o \
diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index cc2ba97d9a..76c8f2005d 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -8,11 +8,14 @@ pg_upgrade_sources = files(
'file.c',
'function.c',
'info.c',
+ 'multixact_old.c',
+ 'multixact_rewrite.c',
'option.c',
'parallel.c',
'pg_upgrade.c',
'relfilenumber.c',
'server.c',
+ 'slru_io.c',
'tablespace.c',
'task.c',
'util.c',
diff --git a/src/bin/pg_upgrade/multixact_old.c b/src/bin/pg_upgrade/multixact_old.c
new file mode 100644
index 0000000000..0442928e89
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_old.c
@@ -0,0 +1,338 @@
+/*
+ * multixact_old.c
+ *
+ * Support for reading pre-v18 format pg_multixact files
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_old.c
+ */
+
+#include "postgres_fe.h"
+
+#include "access/transam.h"
+#include "pg_upgrade.h"
+#include "multixact_old.h"
+#include "slru_io.h"
+
+/*
+ * Below are a bunch of definitions that are copy-pasted from multixact.c from
+ * version 17. They shadow the new definitions in access/multixact.h, so it's
+ * important that we *don't* include that here. That's is a big reason this
+ * code has to be in a separate source file.
+ *
+ * All references to MultiXactOffset have been replaced with OldMultiXactOffset;
+ */
+typedef uint32 OldMultiXactOffset;
+
+#define FirstMultiXactId ((MultiXactId) 1)
+
+/*
+ * Possible multixact lock modes ("status"). The first four modes are for
+ * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
+ * next two are used for update and delete modes.
+ */
+typedef enum
+{
+ MultiXactStatusForKeyShare = 0x00,
+ MultiXactStatusForShare = 0x01,
+ MultiXactStatusForNoKeyUpdate = 0x02,
+ MultiXactStatusForUpdate = 0x03,
+ /* an update that doesn't touch "key" columns */
+ MultiXactStatusNoKeyUpdate = 0x04,
+ /* other updates, and delete */
+ MultiXactStatusUpdate = 0x05,
+} MultiXactStatus;
+
+/* does a status value correspond to a tuple update? */
+#define ISUPDATE_from_mxstatus(status) \
+ ((status) > MultiXactStatusForUpdate)
+
+/*
+ * Defines for OldMultiXactOffset page sizes. A page is the same BLCKSZ as is
+ * used everywhere else in Postgres.
+ *
+ * Note: because OldMultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
+ * MultiXact page numbering also wraps around at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
+ * take no explicit notice of that fact in this module, except when comparing
+ * segment and page numbers in TruncateMultiXact (see
+ * OldMultiXactOffsetPagePrecedes).
+ */
+
+/* We need four bytes per offset */
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(uint32))
+
+static inline int64
+MultiXactIdToOffsetPage(MultiXactId multi)
+{
+ return multi / MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int
+MultiXactIdToOffsetEntry(MultiXactId multi)
+{
+ return multi % MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int64
+MultiXactIdToOffsetSegment(MultiXactId multi)
+{
+ return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId. To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT 8
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+static inline int64
+MXOffsetToMemberPage(OldMultiXactOffset offset)
+{
+ return offset / MULTIXACT_MEMBERS_PER_PAGE;
+}
+
+/* Location (byte offset within page) of flag word for a given member */
+static inline int
+MXOffsetToFlagsOffset(OldMultiXactOffset offset)
+{
+ OldMultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
+ int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
+
+ return byteoff;
+}
+
+static inline int
+MXOffsetToFlagsBitShift(OldMultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
+
+ return bshift;
+}
+
+/* Location (byte offset within page) of TransactionId of given member */
+static inline int
+MXOffsetToMemberOffset(OldMultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+
+ return MXOffsetToFlagsOffset(offset) +
+ MULTIXACT_FLAGBYTES_PER_GROUP +
+ member_in_group * sizeof(TransactionId);
+}
+
+typedef struct OldMultiXactReader
+{
+ MultiXactId nextMXact;
+ uint32 nextOffset;
+
+ SlruSegState *offset;
+ SlruSegState *members;
+} OldMultiXactReader;
+
+OldMultiXactReader *
+StartOldMultiXactRead(void)
+{
+ OldMultiXactReader *state;
+ char *dir;
+
+ state = pg_malloc(sizeof(OldMultiXactReader));
+ state->nextMXact = old_cluster.controldata.chkpnt_nxtmulti;
+ state->nextOffset = old_cluster.controldata.chkpnt_nxtmxoff;
+
+ dir = psprintf("%s/pg_multixact/offsets", old_cluster.pgdata);
+ state->offset = OpenSlruRead(dir);
+ pg_free(dir);
+
+ dir = psprintf("%s/pg_multixact/members", old_cluster.pgdata);
+ state->members = OpenSlruRead(dir);
+ pg_free(dir);
+
+ return state;
+}
+
+/*
+ * This is a simplified version of the GetMultiXactIdMembers() server function.
+ *
+ * - Only return the updating member, if any. Upgrade only cares about the updaters.
+ * If there is no updating member, return the first locking-only member. We don't
+ * have any way to represent "no members", but we also don't need to preserve all
+ * the locking members.
+ *
+ * - We don't need to worry about locking and some corner cases because there's
+ * no concurrent activity.
+ */
+void
+GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
+ TransactionId *result, bool *isupdate)
+{
+ TransactionId result_xid;
+ bool result_isupdate;
+ int64 pageno;
+ int64 prev_pageno;
+ int entryno;
+ OldMultiXactOffset *offptr;
+ OldMultiXactOffset offset;
+ int length;
+ MultiXactId nextMXact;
+ MultiXactId tmpMXact;
+ OldMultiXactOffset nextOffset;
+ char *buf;
+
+ nextMXact = state->nextMXact;
+ nextOffset = state->nextOffset;
+
+ /*
+ * Find out the offset at which we need to start reading MultiXactMembers
+ * and the number of members in the multixact. We determine the latter as
+ * the difference between this multixact's starting offset and the next
+ * one's. However, there are some corner cases to worry about:
+ *
+ * 1. This multixact may be the latest one created, in which case there is
+ * no next one to look at. In this case the nextOffset value we just
+ * saved is the correct endpoint.
+ *
+ * 2. (this cannot happen during upgrade)
+ *
+ * 3. Because GetNewMultiXactId increments offset zero to offset one to
+ * handle case #2, there is an ambiguity near the point of offset
+ * wraparound. If we see next multixact's offset is one, is that our
+ * multixact's actual endpoint, or did it end at zero with a subsequent
+ * increment? We handle this using the knowledge that if the zero'th
+ * member slot wasn't filled, it'll contain zero, and zero isn't a valid
+ * transaction ID so it can't be a multixact member. Therefore, if we
+ * read a zero from the members array, just ignore it.
+ */
+ pageno = MultiXactIdToOffsetPage(multi);
+ entryno = MultiXactIdToOffsetEntry(multi);
+
+ buf = SlruReadSwitchPage(state->offset, pageno);
+ offptr = (OldMultiXactOffset *) buf;
+ offptr += entryno;
+ offset = *offptr;
+
+ Assert(offset != 0);
+
+ /*
+ * Use the same increment rule as GetNewMultiXactId(), that is, don't
+ * handle wraparound explicitly until needed.
+ */
+ tmpMXact = multi + 1;
+
+ if (nextMXact == tmpMXact)
+ {
+ /* Corner case 1: there is no next multixact */
+ length = nextOffset - offset;
+ }
+ else
+ {
+ OldMultiXactOffset nextMXOffset;
+
+ /* handle wraparound if needed */
+ if (tmpMXact < FirstMultiXactId)
+ tmpMXact = FirstMultiXactId;
+
+ prev_pageno = pageno;
+
+ pageno = MultiXactIdToOffsetPage(tmpMXact);
+ entryno = MultiXactIdToOffsetEntry(tmpMXact);
+
+ if (pageno != prev_pageno)
+ {
+ buf = SlruReadSwitchPage(state->offset, pageno);
+ }
+
+ offptr = (OldMultiXactOffset *) buf;
+ offptr += entryno;
+ nextMXOffset = *offptr;
+
+ if (nextMXOffset == 0)
+ {
+ /* Corner case 2: next multixact is still being filled in */
+ Assert(false); /* shouldn't happen during upgrade */
+ }
+
+ length = nextMXOffset - offset;
+ }
+
+ result_xid = InvalidTransactionId;
+ result_isupdate = false;
+ prev_pageno = -1;
+ for (int i = 0; i < length; i++, offset++)
+ {
+ TransactionId *xactptr;
+ uint32 *flagsptr;
+ int flagsoff;
+ int bshift;
+ int memberoff;
+ MultiXactStatus status;
+
+ pageno = MXOffsetToMemberPage(offset);
+ memberoff = MXOffsetToMemberOffset(offset);
+
+ if (pageno != prev_pageno)
+ {
+ buf = SlruReadSwitchPage(state->members, pageno);
+ prev_pageno = pageno;
+ }
+
+ xactptr = (TransactionId *) (buf + memberoff);
+
+ if (!TransactionIdIsValid(*xactptr))
+ {
+ /* Corner case 3: we must be looking at unused slot zero */
+ Assert(offset == 0);
+ continue;
+ }
+
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ bshift = MXOffsetToFlagsBitShift(offset);
+ flagsptr = (uint32 *) (buf + flagsoff);
+
+ status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
+
+ /* Verify that there is a single update Xid among the given members. */
+ if (ISUPDATE_from_mxstatus(status))
+ {
+ if (result_isupdate)
+ pg_fatal("multixact %u has more than one updating member",
+ multi);
+ result_xid = *xactptr;
+ result_isupdate = true;
+ }
+ else if (!TransactionIdIsValid(result_xid))
+ result_xid = *xactptr;
+ }
+
+ /* A multixid with zero members should not happen */
+ Assert(TransactionIdIsValid(result_xid));
+
+ *result = result_xid;
+ *isupdate = result_isupdate;
+}
diff --git a/src/bin/pg_upgrade/multixact_old.h b/src/bin/pg_upgrade/multixact_old.h
new file mode 100644
index 0000000000..70800c1cda
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_old.h
@@ -0,0 +1,12 @@
+/*
+ * multixact_old.h
+ *
+ * Copyright (c) 2010-2024, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_old.h
+ */
+
+typedef struct OldMultiXactReader OldMultiXactReader;
+
+extern OldMultiXactReader *StartOldMultiXactRead(void);
+extern void GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
+ TransactionId *result, bool *isupdate);
diff --git a/src/bin/pg_upgrade/multixact_rewrite.c b/src/bin/pg_upgrade/multixact_rewrite.c
new file mode 100644
index 0000000000..8c3f538cc9
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_rewrite.c
@@ -0,0 +1,238 @@
+/*
+ * multixact_rewrite.c
+ *
+ * Rewrite pre-v18 multixacts to new format with 64-bit MultiXactOffsets
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_rewrite.c
+ */
+
+#include "postgres_fe.h"
+
+#include "multixact_old.h"
+#include "pg_upgrade.h"
+#include "slru_io.h"
+
+#include "access/multixact.h"
+#include "access/multixact_internal.h"
+
+typedef struct
+{
+ MultiXactId nextMXact;
+ MultiXactOffset nextOffset;
+
+ SlruSegState *offset;
+ SlruSegState *members;
+} MultiXactWriter;
+
+static MultiXactWriter *StartMultiXactWrite(MultiXactId firstMulti, MultiXactOffset firstOffset);
+static MultiXactId GetNewMultiXactId(MultiXactWriter *state, int nmembers, MultiXactOffset *offset);
+static void RecordNewMultiXact(MultiXactWriter *state,
+ MultiXactOffset offset,
+ MultiXactId multi,
+ int nmembers, MultiXactMember *members);
+static void CloseMultiXactWrite(MultiXactWriter *state);
+
+
+/*
+ * Convert pg_multixact/offset and /members to new format with 64-bit offsets.
+ */
+void
+convert_multixacts(MultiXactId *new_nxtmulti, MultiXactOffset *new_nxtmxoff)
+{
+ MultiXactWriter *new_writer;
+ MultiXactId oldest_multi = old_cluster.controldata.chkpnt_oldstMulti,
+ next_multi = old_cluster.controldata.chkpnt_nxtmulti,
+ multi;
+ OldMultiXactReader *old_reader;
+
+ if (next_multi < FirstMultiXactId)
+ next_multi = FirstMultiXactId;
+
+ old_reader = StartOldMultiXactRead();
+ new_writer = StartMultiXactWrite(oldest_multi, 1);
+
+ /*
+ * Read multixids from old files one by one, and write them back in the
+ * new format.
+ *
+ * The locking-only XIDs that may be part of multi-xids don't matter after
+ * upgrade, as there can be no transactions running across upgrade. So as
+ * a little optimization, we only read one member from each multixid: the
+ * one updating one, or if there was no update, arbitrarily the first
+ * locking xid.
+ */
+ for (multi = oldest_multi; multi != next_multi;)
+ {
+ TransactionId xid;
+ bool isupdate;
+ MultiXactMember member;
+ MultiXactId newmulti PG_USED_FOR_ASSERTS_ONLY;
+ MultiXactOffset offset;
+
+ /* Read the old multixid */
+ GetOldMultiXactIdSingleMember(old_reader, multi, &xid, &isupdate);
+
+ /* Write it out in new format */
+ member.xid = xid;
+ member.status = isupdate ? MultiXactStatusUpdate : MultiXactStatusForKeyShare;
+ newmulti = GetNewMultiXactId(new_writer, 1, &offset);
+ Assert(newmulti == multi);
+ RecordNewMultiXact(new_writer, offset, multi, 1, &member);
+
+ multi++;
+ if (multi < FirstMultiXactId)
+ multi = FirstMultiXactId;
+ }
+
+ /*
+ * Update the nextMXact/Offset values in the control file to match what we
+ * wrote. The nextMXact should be unchanged, but because we ignored the
+ * locking XIDs members, the nextOffset will be different.
+ */
+ Assert(new_writer->nextMXact == next_multi);
+ *new_nxtmulti = next_multi;
+ *new_nxtmxoff = new_writer->nextOffset;
+
+ /* Release resources */
+ CloseMultiXactWrite(new_writer);
+}
+
+/* Support routines for writing the new format */
+
+static MultiXactWriter *
+StartMultiXactWrite(MultiXactId firstMulti, MultiXactOffset firstOffset)
+{
+ MultiXactWriter *state;
+ char *dir;
+
+ state = pg_malloc(sizeof(MultiXactWriter));
+ state->nextMXact = firstMulti;
+ state->nextOffset = firstOffset;
+
+ dir = psprintf("%s/pg_multixact/offsets", new_cluster.pgdata);
+ state->offset = OpenSlruWrite(dir, MultiXactIdToOffsetPage(firstMulti));
+ pg_free(dir);
+
+ dir = psprintf("%s/pg_multixact/members", new_cluster.pgdata);
+ state->members = OpenSlruWrite(dir, MXOffsetToMemberPage(1));
+ pg_free(dir);
+
+ return state;
+}
+
+static void
+CloseMultiXactWrite(MultiXactWriter *state)
+{
+ CloseSlruWrite(state->offset);
+ CloseSlruWrite(state->members);
+ pg_free(state);
+}
+
+/*
+ * Simplified copy of the corresponding server function
+ */
+static MultiXactId
+GetNewMultiXactId(MultiXactWriter *state, int nmembers, MultiXactOffset *offset)
+{
+ MultiXactId result;
+
+ /* Handle wraparound of the nextMXact counter */
+ if (state->nextMXact < FirstMultiXactId)
+ state->nextMXact = FirstMultiXactId;
+
+ /* Assign the MXID */
+ result = state->nextMXact;
+
+ /*
+ * Reserve the members space, similarly to above.
+ */
+ *offset = state->nextOffset;
+
+ /*
+ * Advance counters. As in GetNewTransactionId(), this must not happen
+ * until after file extension has succeeded!
+ *
+ * We don't care about MultiXactId wraparound here; it will be handled by
+ * the next iteration. But note that nextMXact may be InvalidMultiXactId
+ * or the first value on a segment-beginning page after this routine
+ * exits, so anyone else looking at the variable must be prepared to deal
+ * with either case. Similarly, nextOffset may be zero, but we won't use
+ * that as the actual start offset of the next multixact.
+ */
+ (state->nextMXact)++;
+
+ state->nextOffset += nmembers;
+
+ return result;
+}
+
+/*
+ * Write a new multixact with members.
+ *
+ * Simplified version of the correspoding server function.
+ */
+static void
+RecordNewMultiXact(MultiXactWriter *state, MultiXactOffset offset,
+ MultiXactId multi,
+ int nmembers, MultiXactMember *members)
+{
+ int64 pageno;
+ int64 prev_pageno;
+ int entryno;
+
+ char *buf;
+ MultiXactOffset *offptr;
+
+ pageno = MultiXactIdToOffsetPage(multi);
+ entryno = MultiXactIdToOffsetEntry(multi);
+
+ /*
+ * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
+ * to complain about if there's any I/O error. This is kinda bogus, but
+ * since the errors will always give the full pathname, it should be clear
+ * enough that a MultiXactId is really involved. Perhaps someday we'll
+ * take the trouble to generalize the slru.c error reporting code.
+ */
+ buf = SlruWriteSwitchPage(state->offset, pageno);
+ offptr = (MultiXactOffset *) buf;
+ offptr += entryno;
+
+ *offptr = offset;
+
+ prev_pageno = -1;
+
+ for (int i = 0; i < nmembers; i++, offset++)
+ {
+ TransactionId *memberptr;
+ uint32 *flagsptr;
+ uint32 flagsval;
+ int bshift;
+ int flagsoff;
+ int memberoff;
+
+ Assert(members[i].status <= MultiXactStatusUpdate);
+
+ pageno = MXOffsetToMemberPage(offset);
+ memberoff = MXOffsetToMemberOffset(offset);
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ bshift = MXOffsetToFlagsBitShift(offset);
+
+ if (pageno != prev_pageno)
+ {
+ buf = SlruWriteSwitchPage(state->members, pageno);
+ prev_pageno = pageno;
+ }
+
+ memberptr = (TransactionId *) (buf + memberoff);
+
+ *memberptr = members[i].xid;
+
+ flagsptr = (uint32 *) (buf + flagsoff);
+
+ flagsval = *flagsptr;
+ flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
+ flagsval |= (members[i].status << bshift);
+ *flagsptr = flagsval;
+ }
+}
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 36c7f3879d..9bf191b984 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -750,8 +750,27 @@ copy_xact_xlog_xid(void)
if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER &&
new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
{
- copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
- copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+ MultiXactId new_nxtmulti = old_cluster.controldata.chkpnt_nxtmulti;
+ MultiXactOffset new_nxtmxoff = old_cluster.controldata.chkpnt_nxtmxoff;
+
+ /*
+ * If the old server is before the MULTIXACTOFFSET_FORMATCHANGE_CAT_VER
+ * it must have 32-bit multixid offsets, thus it should be converted.
+ */
+ if (old_cluster.controldata.cat_ver < MULTIXACTOFFSET_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= MULTIXACTOFFSET_FORMATCHANGE_CAT_VER)
+ {
+ remove_new_subdir("pg_multixact/members", false);
+ remove_new_subdir("pg_multixact/offsets", false);
+ prep_status("Converting pg_multixact/offsets to 64-bit");
+ convert_multixacts(&new_nxtmulti, &new_nxtmxoff);
+ check_ok();
+ }
+ else
+ {
+ copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
+ copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+ }
prep_status("Setting next multixact ID and offset for new cluster");
@@ -760,10 +779,10 @@ copy_xact_xlog_xid(void)
* counters here and the oldest multi present on system.
*/
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
- "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
+ "\"%s/pg_resetwal\" -O %llu -m %u,%u \"%s\"",
new_cluster.bindir,
- old_cluster.controldata.chkpnt_nxtmxoff,
- old_cluster.controldata.chkpnt_nxtmulti,
+ (unsigned long long) new_nxtmxoff,
+ new_nxtmulti,
old_cluster.controldata.chkpnt_oldstMulti,
new_cluster.pgdata);
check_ok();
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 0cdd675e4f..9b3d645b08 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -114,6 +114,13 @@ extern char *output_files[];
*/
#define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
+/*
+ * Swicth from 32-bit to 64-bit for multixid offsets.
+ *
+ * XXX: should be changed to the actual CATALOG_VERSION_NO on commit.
+ */
+#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202409041
+
/*
* large object chunk size added to pg_controldata,
* commit 5f93c37805e7485488480916b4585e098d3cc883
@@ -230,7 +237,7 @@ typedef struct
uint32 chkpnt_nxtepoch;
uint32 chkpnt_nxtoid;
uint32 chkpnt_nxtmulti;
- uint32 chkpnt_nxtmxoff;
+ uint64 chkpnt_nxtmxoff;
uint32 chkpnt_oldstMulti;
uint32 chkpnt_oldstxid;
uint32 align;
@@ -515,3 +522,7 @@ typedef struct
FILE *file;
char path[MAXPGPATH];
} UpgradeTaskReport;
+
+/* multixact_rewrite.c */
+
+void convert_multixacts(MultiXactId *new_nxtmulti, MultiXactOffset *new_nxtmxoff);
diff --git a/src/bin/pg_upgrade/slru_io.c b/src/bin/pg_upgrade/slru_io.c
new file mode 100644
index 0000000000..87acf16732
--- /dev/null
+++ b/src/bin/pg_upgrade/slru_io.c
@@ -0,0 +1,211 @@
+/*
+ * slru_io.c
+ *
+ * Routines for reading and writing SLRU files during upgrade.
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/slru_io.c
+ */
+
+#include "postgres_fe.h"
+
+#include <fcntl.h>
+
+#include "pg_upgrade.h"
+#include "slru_io.h"
+
+#include "common/file_perm.h"
+#include "common/file_utils.h"
+#include "port/pg_iovec.h"
+
+/*
+ * State for reading or writing an SLRU, with a one page buffer.
+ */
+typedef struct SlruSegState
+{
+ bool writing;
+
+ char *dir;
+ char *fn;
+ int fd;
+ int64 segno;
+ uint64 pageno;
+
+ PGAlignedBlock buf;
+} SlruSegState;
+
+static void SlruFlush(SlruSegState *state);
+
+
+SlruSegState *
+OpenSlruRead(char *dir)
+{
+ SlruSegState *state;
+
+ state = pg_malloc(sizeof(SlruSegState));
+ state->writing = false;
+ state->segno = -1;
+ state->pageno = 0;
+ state->dir = pstrdup(dir);
+ state->fd = -1;
+ state->fn = NULL;
+
+ return state;
+}
+
+void
+CloseSlruRead(SlruSegState *state)
+{
+ Assert(!state->writing);
+ close(state->fd);
+ pg_free(state);
+}
+
+SlruSegState *
+OpenSlruWrite(char *dir, int64 startPageno)
+{
+ SlruSegState *state;
+
+ state = pg_malloc(sizeof(SlruSegState));
+ state->writing = true;
+ state->segno = -1;
+ state->pageno = 0;
+ state->dir = pstrdup(dir);
+ state->fd = -1;
+ state->fn = NULL;
+
+ return state;
+}
+
+void
+CloseSlruWrite(SlruSegState *state)
+{
+ Assert(state->writing);
+ SlruFlush(state);
+
+ close(state->fd);
+ pg_free(state);
+}
+
+static void
+SlruFlush(SlruSegState *state)
+{
+ struct iovec iovec = {
+ .iov_base = &state->buf,
+ .iov_len = BLCKSZ,
+ };
+ off_t offset;
+
+ if (state->segno == -1)
+ return;
+
+ offset = (state->pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+ if (pg_pwritev_with_retry(state->fd, &iovec, 1, offset) < 0)
+ pg_fatal("could not write file \"%s\": %m", state->fn);
+}
+
+/*
+ * Open the given page for writing.
+ *
+ * NOTE: This uses O_EXCL when stepping to a new segment, so this assumes that
+ * each segment is written in full before moving on to next one. This
+ * limitation would be easy to lift if needed, but it fits the usage pattern
+ * of current callers.
+ */
+char *
+SlruWriteSwitchPage(SlruSegState *state, uint64 pageno)
+{
+ int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
+ off_t offset;
+
+ if (state->segno != -1 && pageno == state->pageno)
+ return state->buf.data;
+
+ segno = pageno / SLRU_PAGES_PER_SEGMENT;
+ offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+ SlruFlush(state);
+ memset(state->buf.data, 0, BLCKSZ);
+
+ if (segno != state->segno)
+ {
+ if (state->segno != -1)
+ {
+ close(state->fd);
+ state->fd = -1;
+ pg_free(state->fn);
+ state->fn = NULL;
+ }
+
+ /* Create the segment */
+ state->fn = psprintf("%s/%04X", state->dir, (unsigned int) segno);
+ if ((state->fd = open(state->fn, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+ pg_file_create_mode)) < 0)
+ {
+ pg_fatal("could not create file \"%s\": %m", state->fn);
+ }
+ state->segno = segno;
+
+ if (offset > 0)
+ {
+ if (pg_pwrite_zeros(state->fd, offset, 0) < 0)
+ pg_fatal("could not write file \"%s\": %m", state->fn);
+ }
+ }
+
+ state->pageno = pageno;
+ return state->buf.data;
+}
+
+/*
+ * Open given page for reading.
+ *
+ * Reading can be done in random order.
+ */
+char *
+SlruReadSwitchPage(SlruSegState *state, uint64 pageno)
+{
+ int64 segno;
+
+ if (state->segno != -1 && pageno == state->pageno)
+ return state->buf.data;
+
+ segno = pageno / SLRU_PAGES_PER_SEGMENT;
+
+ if (segno != state->segno)
+ {
+ if (state->segno != -1)
+ {
+ close(state->fd);
+ state->fd = -1;
+ pg_free(state->fn);
+ state->fn = NULL;
+ }
+
+ /* Open new segment */
+ state->fn = psprintf("%s/%04X", state->dir, (unsigned int) segno);
+ if ((state->fd = open(state->fn, O_RDONLY | PG_BINARY, 0)) < 0)
+ {
+ pg_fatal("could not open file \"%s\": %m", state->fn);
+ }
+ state->segno = segno;
+ }
+
+ {
+ struct iovec iovec = {
+ .iov_base = &state->buf,
+ .iov_len = BLCKSZ,
+ };
+ off_t offset;
+
+ offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+ if (pg_preadv(state->fd, &iovec, 1, offset) < 0)
+ pg_fatal("could not read file \"%s\": %m", state->fn);
+
+ state->pageno = pageno;
+ }
+
+ return state->buf.data;
+}
diff --git a/src/bin/pg_upgrade/slru_io.h b/src/bin/pg_upgrade/slru_io.h
new file mode 100644
index 0000000000..e1a9c06313
--- /dev/null
+++ b/src/bin/pg_upgrade/slru_io.h
@@ -0,0 +1,23 @@
+/*
+ * slru_io.h
+ *
+ * Copyright (c) 2010-2024, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/slru_io.h
+ */
+
+/* XXX: copied from slru.h */
+#define SLRU_PAGES_PER_SEGMENT 32
+
+/*
+ * Some kind of iterator associated with a particular SLRU segment. The idea is
+ * to specify the segment and page number and then move through the pages.
+ */
+typedef struct SlruSegState SlruSegState;
+
+extern SlruSegState *OpenSlruRead(char *dir);
+extern void CloseSlruRead(SlruSegState *state);
+extern char *SlruReadSwitchPage(SlruSegState *state, uint64 pageno);
+
+extern SlruSegState *OpenSlruWrite(char *dir, int64 startPageno);
+extern void CloseSlruWrite(SlruSegState *state);
+extern char *SlruWriteSwitchPage(SlruSegState *state, uint64 pageno);
--
2.43.0
[text/plain] v13-0004-TEST-initdb-option-to-initialize-cluster-with-no.patch.txt (25.4K, 5-v13-0004-TEST-initdb-option-to-initialize-cluster-with-no.patch.txt)
download | inline diff:
From be906a9c2161e6972a396a9d283bb76ca023a808 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Wed, 4 May 2022 15:53:36 +0300
Subject: [PATCH v13 4/7] TEST: initdb option to initialize cluster with
non-standard xid/mxid/mxoff
To date testing database cluster wraparund was not easy as initdb has always
inited it with default xid/mxid/mxoff. The option to specify any valid
xid/mxid/mxoff at cluster startup will make these things easier.
Author: Maxim Orlov <[email protected]>
Author: Pavel Borisov <[email protected]>
Author: Svetlana Derevyanko <[email protected]>
Discussion: https://www.postgresql.org/message-id/flat/CACG%3Dezaa4vqYjJ16yoxgrpa-%3DgXnf0Vv3Ey9bjGrRRFN2YyWFQ%40mail.gmail.com
---
src/backend/access/transam/clog.c | 21 +++++
src/backend/access/transam/multixact.c | 53 ++++++++++++
src/backend/access/transam/subtrans.c | 8 +-
src/backend/access/transam/xlog.c | 15 ++--
src/backend/bootstrap/bootstrap.c | 50 +++++++++++-
src/backend/main/main.c | 6 ++
src/backend/postmaster/postmaster.c | 14 +++-
src/backend/tcop/postgres.c | 53 +++++++++++-
src/bin/initdb/initdb.c | 107 ++++++++++++++++++++++++-
src/bin/initdb/t/001_initdb.pl | 60 ++++++++++++++
src/include/access/xlog.h | 3 +
src/include/c.h | 4 +
src/include/catalog/pg_class.h | 2 +-
13 files changed, 382 insertions(+), 14 deletions(-)
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 0d556c00b8..89516e9f52 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -834,6 +834,7 @@ BootStrapCLOG(void)
{
int slotno;
LWLock *lock = SimpleLruGetBankLock(XactCtl, 0);
+ int64 pageno;
LWLockAcquire(lock, LW_EXCLUSIVE);
@@ -844,6 +845,26 @@ BootStrapCLOG(void)
SimpleLruWritePage(XactCtl, slotno);
Assert(!XactCtl->shared->page_dirty[slotno]);
+ pageno = TransactionIdToPage(XidFromFullTransactionId(TransamVariables->nextXid));
+ if (pageno != 0)
+ {
+ LWLock *nextlock = SimpleLruGetBankLock(XactCtl, pageno);
+
+ if (nextlock != lock)
+ {
+ LWLockRelease(lock);
+ LWLockAcquire(nextlock, LW_EXCLUSIVE);
+ lock = nextlock;
+ }
+
+ /* Create and zero the first page of the commit log */
+ slotno = ZeroCLOGPage(pageno, false);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(XactCtl, slotno);
+ Assert(!XactCtl->shared->page_dirty[slotno]);
+ }
+
LWLockRelease(lock);
}
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index d63ae17330..70c9d2f6ee 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1815,6 +1815,7 @@ BootStrapMultiXact(void)
{
int slotno;
LWLock *lock;
+ int64 pageno;
lock = SimpleLruGetBankLock(MultiXactOffsetCtl, 0);
LWLockAcquire(lock, LW_EXCLUSIVE);
@@ -1826,6 +1827,26 @@ BootStrapMultiXact(void)
SimpleLruWritePage(MultiXactOffsetCtl, slotno);
Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
+ pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
+ if (pageno != 0)
+ {
+ LWLock *nextlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
+
+ if (nextlock != lock)
+ {
+ LWLockRelease(lock);
+ LWLockAcquire(nextlock, LW_EXCLUSIVE);
+ lock = nextlock;
+ }
+
+ /* Create and zero the first page of the offsets log */
+ slotno = ZeroMultiXactOffsetPage(pageno, false);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(MultiXactOffsetCtl, slotno);
+ Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
+ }
+
LWLockRelease(lock);
lock = SimpleLruGetBankLock(MultiXactMemberCtl, 0);
@@ -1838,7 +1859,39 @@ BootStrapMultiXact(void)
SimpleLruWritePage(MultiXactMemberCtl, slotno);
Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
+ pageno = MXOffsetToMemberPage(MultiXactState->nextOffset);
+ if (pageno != 0)
+ {
+ LWLock *nextlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
+
+ if (nextlock != lock)
+ {
+ LWLockRelease(lock);
+ LWLockAcquire(nextlock, LW_EXCLUSIVE);
+ lock = nextlock;
+ }
+
+ /* Create and zero the first page of the members log */
+ slotno = ZeroMultiXactMemberPage(pageno, false);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(MultiXactMemberCtl, slotno);
+ Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
+ }
+
LWLockRelease(lock);
+
+ /*
+ * If we're starting not from zero offset, initilize dummy multixact to
+ * evade too long loop in PerformMembersTruncation().
+ */
+ if (MultiXactState->nextOffset > 0 && MultiXactState->nextMXact > 0)
+ {
+ RecordNewMultiXact(FirstMultiXactId,
+ MultiXactState->nextOffset, 0, NULL);
+ RecordNewMultiXact(MultiXactState->nextMXact,
+ MultiXactState->nextOffset, 0, NULL);
+ }
}
/*
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 15153618fa..218675fa60 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -270,12 +270,16 @@ void
BootStrapSUBTRANS(void)
{
int slotno;
- LWLock *lock = SimpleLruGetBankLock(SubTransCtl, 0);
+ LWLock *lock;
+ int64 pageno;
+
+ pageno = TransactionIdToPage(XidFromFullTransactionId(TransamVariables->nextXid));
+ lock = SimpleLruGetBankLock(SubTransCtl, pageno);
LWLockAcquire(lock, LW_EXCLUSIVE);
/* Create and zero the first page of the subtrans log */
- slotno = ZeroSUBTRANSPage(0);
+ slotno = ZeroSUBTRANSPage(pageno);
/* Make sure it's written out */
SimpleLruWritePage(SubTransCtl, slotno);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a813a090fa..9f78a3e34a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -136,6 +136,10 @@ int max_slot_wal_keep_size_mb = -1;
int wal_decode_buffer_size = 512 * 1024;
bool track_wal_io_timing = false;
+TransactionId start_xid = FirstNormalTransactionId;
+MultiXactId start_mxid = FirstMultiXactId;
+MultiXactOffset start_mxoff = 0;
+
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
#endif
@@ -5080,13 +5084,14 @@ BootStrapXLOG(uint32 data_checksum_version)
checkPoint.fullPageWrites = fullPageWrites;
checkPoint.wal_level = wal_level;
checkPoint.nextXid =
- FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+ FullTransactionIdFromEpochAndXid(0, Max(FirstNormalTransactionId,
+ start_xid));
checkPoint.nextOid = FirstGenbkiObjectId;
- checkPoint.nextMulti = FirstMultiXactId;
- checkPoint.nextMultiOffset = 1;
- checkPoint.oldestXid = FirstNormalTransactionId;
+ checkPoint.nextMulti = Max(FirstMultiXactId, start_mxid);
+ checkPoint.nextMultiOffset = Max(1, start_mxoff);
+ checkPoint.oldestXid = XidFromFullTransactionId(checkPoint.nextXid);
checkPoint.oldestXidDB = Template1DbOid;
- checkPoint.oldestMulti = FirstMultiXactId;
+ checkPoint.oldestMulti = checkPoint.nextMulti;
checkPoint.oldestMultiDB = Template1DbOid;
checkPoint.oldestCommitTsXid = InvalidTransactionId;
checkPoint.newestCommitTsXid = InvalidTransactionId;
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 359f58a8f9..b697138b7e 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -218,7 +218,7 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
argv++;
argc--;
- while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:X:-:")) != -1)
+ while ((flag = getopt(argc, argv, "B:c:d:D:Fkm:o:r:X:x:-:")) != -1)
{
switch (flag)
{
@@ -286,12 +286,60 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
case 'k':
bootstrap_data_checksum_version = PG_DATA_CHECKSUM_VERSION;
break;
+ case 'm':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactIdIsValid(start_mxid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact id")));
+ }
+ }
+ break;
+ case 'o':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxoff = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactOffsetIsValid(start_mxoff))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact offset")));
+ }
+ }
+ break;
case 'r':
strlcpy(OutputFileName, optarg, MAXPGPATH);
break;
case 'X':
SetConfigOption("wal_segment_size", optarg, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
break;
+ case 'x':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_xid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartTransactionIdIsValid(start_xid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster xid value")));
+ }
+ }
+ break;
default:
write_stderr("Try \"%s --help\" for more information.\n",
progname);
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index e8effe5024..ff252dffbd 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -426,12 +426,18 @@ help(const char *progname)
printf(_(" -E echo statement before execution\n"));
printf(_(" -j do not use newline as interactive query delimiter\n"));
printf(_(" -r FILENAME send stdout and stderr to given file\n"));
+ printf(_(" -m START_MXID set initial database cluster multixact id\n"));
+ printf(_(" -o START_MXOFF set initial database cluster multixact offset\n"));
+ printf(_(" -x START_XID set initial database cluster xid\n"));
printf(_("\nOptions for bootstrapping mode:\n"));
printf(_(" --boot selects bootstrapping mode (must be first argument)\n"));
printf(_(" --check selects check mode (must be first argument)\n"));
printf(_(" DBNAME database name (mandatory argument in bootstrapping mode)\n"));
printf(_(" -r FILENAME send stdout and stderr to given file\n"));
+ printf(_(" -m START_MXID set initial database cluster multixact id\n"));
+ printf(_(" -o START_MXOFF set initial database cluster multixact offset\n"));
+ printf(_(" -x START_XID set initial database cluster xid\n"));
printf(_("\nPlease read the documentation for the complete list of run-time\n"
"configuration settings and how to set them on the command line or in\n"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index bb22b13ade..028a734517 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -585,7 +585,7 @@ PostmasterMain(int argc, char *argv[])
* tcop/postgres.c (the option sets should not conflict) and with the
* common help() function in main/main.c.
*/
- while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
+ while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lm:N:Oo:Pp:r:S:sTt:W:x:-:")) != -1)
{
switch (opt)
{
@@ -695,10 +695,18 @@ PostmasterMain(int argc, char *argv[])
SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
+ case 'm':
+ /* only used by single-user backend */
+ break;
+
case 'O':
SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
break;
+ case 'o':
+ /* only used by single-user backend */
+ break;
+
case 'P':
SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
break;
@@ -749,6 +757,10 @@ PostmasterMain(int argc, char *argv[])
SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
+ case 'x':
+ /* only used by single-user backend */
+ break;
+
default:
write_stderr("Try \"%s --help\" for more information.\n",
progname);
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5655348a2e..9c170f4906 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3788,7 +3788,7 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
* postmaster/postmaster.c (the option sets should not conflict) and with
* the common help() function in main/main.c.
*/
- while ((flag = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:v:W:-:")) != -1)
+ while ((flag = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lm:N:nOo:Pp:r:S:sTt:v:W:x:-:")) != -1)
{
switch (flag)
{
@@ -3893,6 +3893,23 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
SetConfigOption("ssl", "true", ctx, gucsource);
break;
+ case 'm':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactIdIsValid(start_mxid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact id")));
+ }
+ }
+ break;
+
case 'N':
SetConfigOption("max_connections", optarg, ctx, gucsource);
break;
@@ -3905,6 +3922,23 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
SetConfigOption("allow_system_table_mods", "true", ctx, gucsource);
break;
+ case 'o':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxoff = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactOffsetIsValid(start_mxoff))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster multixact offset")));
+ }
+ }
+ break;
+
case 'P':
SetConfigOption("ignore_system_indexes", "true", ctx, gucsource);
break;
@@ -3959,6 +3993,23 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
SetConfigOption("post_auth_delay", optarg, ctx, gucsource);
break;
+ case 'x':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_xid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartTransactionIdIsValid(start_xid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid initial database cluster xid")));
+ }
+ }
+ break;
+
default:
errs++;
break;
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 759672a9b9..125bfb6736 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -168,6 +168,9 @@ static bool data_checksums = true;
static char *xlog_dir = NULL;
static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+static TransactionId start_xid = 0;
+static MultiXactId start_mxid = 0;
+static MultiXactOffset start_mxoff = 0;
/* internal vars */
@@ -1596,6 +1599,11 @@ bootstrap_template1(void)
bki_lines = replace_token(bki_lines, "POSTGRES",
escape_quotes_bki(username));
+ /* relfrozenxid must not be less than FirstNormalTransactionId */
+ sprintf(buf, "%llu", (unsigned long long) Max(start_xid, 3));
+ bki_lines = replace_token(bki_lines, "RECENTXMIN",
+ buf);
+
bki_lines = replace_token(bki_lines, "ENCODING",
encodingid_to_string(encodingid));
@@ -1621,6 +1629,9 @@ bootstrap_template1(void)
printfPQExpBuffer(&cmd, "\"%s\" --boot %s %s", backend_exec, boot_options, extra_options);
appendPQExpBuffer(&cmd, " -X %d", wal_segment_size_mb * (1024 * 1024));
+ appendPQExpBuffer(&cmd, " -m %llu", (unsigned long long) start_mxid);
+ appendPQExpBuffer(&cmd, " -o %llu", (unsigned long long) start_mxoff);
+ appendPQExpBuffer(&cmd, " -x %llu", (unsigned long long) start_xid);
if (data_checksums)
appendPQExpBuffer(&cmd, " -k");
if (debug)
@@ -2562,12 +2573,20 @@ usage(const char *progname)
printf(_(" -d, --debug generate lots of debugging output\n"));
printf(_(" --discard-caches set debug_discard_caches=1\n"));
printf(_(" -L DIRECTORY where to find the input files\n"));
+ printf(_(" -m, --multixact-id=START_MXID\n"
+ " set initial database cluster multixact id\n"
+ " max value is 2^62-1\n"));
printf(_(" -n, --no-clean do not clean up after errors\n"));
printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" --no-instructions do not print instructions for next steps\n"));
+ printf(_(" -o, --multixact-offset=START_MXOFF\n"
+ " set initial database cluster multixact offset\n"
+ " max value is 2^62-1\n"));
printf(_(" -s, --show show internal settings, then exit\n"));
printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
printf(_(" -S, --sync-only only sync database files to disk, then exit\n"));
+ printf(_(" -x, --xid=START_XID set initial database cluster xid\n"
+ " max value is 2^62-1\n"));
printf(_("\nOther options:\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
@@ -3102,6 +3121,18 @@ initialize_data_directory(void)
/* Now create all the text config files */
setup_config();
+ if (start_mxid != 0)
+ printf(_("selecting initial multixact id ... %llu\n"),
+ (unsigned long long) start_mxid);
+
+ if (start_mxoff != 0)
+ printf(_("selecting initial multixact offset ... %llu\n"),
+ (unsigned long long) start_mxoff);
+
+ if (start_xid != 0)
+ printf(_("selecting initial xid ... %llu\n"),
+ (unsigned long long) start_xid);
+
/* Bootstrap template1 */
bootstrap_template1();
@@ -3118,8 +3149,12 @@ initialize_data_directory(void)
fflush(stdout);
initPQExpBuffer(&cmd);
- printfPQExpBuffer(&cmd, "\"%s\" %s %s template1 >%s",
- backend_exec, backend_options, extra_options, DEVNULL);
+ printfPQExpBuffer(&cmd, "\"%s\" %s %s",
+ backend_exec, backend_options, extra_options);
+ appendPQExpBuffer(&cmd, " -m %llu", (unsigned long long) start_mxid);
+ appendPQExpBuffer(&cmd, " -o %llu", (unsigned long long) start_mxoff);
+ appendPQExpBuffer(&cmd, " -x %llu", (unsigned long long) start_xid);
+ appendPQExpBuffer(&cmd, " template1 >%s", DEVNULL);
PG_CMD_OPEN(cmd.data);
@@ -3206,6 +3241,9 @@ main(int argc, char *argv[])
{"icu-rules", required_argument, NULL, 18},
{"sync-method", required_argument, NULL, 19},
{"no-data-checksums", no_argument, NULL, 20},
+ {"xid", required_argument, NULL, 'x'},
+ {"multixact-id", required_argument, NULL, 'm'},
+ {"multixact-offset", required_argument, NULL, 'o'},
{NULL, 0, NULL, 0}
};
@@ -3247,7 +3285,7 @@ main(int argc, char *argv[])
/* process command-line options */
- while ((c = getopt_long(argc, argv, "A:c:dD:E:gkL:nNsST:U:WX:",
+ while ((c = getopt_long(argc, argv, "A:c:dD:E:gkL:m:nNo:sST:U:Wx:X:",
long_options, &option_index)) != -1)
{
switch (c)
@@ -3305,6 +3343,30 @@ main(int argc, char *argv[])
debug = true;
printf(_("Running in debug mode.\n"));
break;
+ case 'm':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactIdIsValid(start_mxid))
+ {
+ pg_log_error("invalid initial database cluster multixact id");
+ exit(1);
+ }
+ else if (start_mxid < 1) /* FirstMultiXactId */
+ {
+ /*
+ * We avoid mxid to be silently set to
+ * FirstMultiXactId, though it does not harm.
+ */
+ pg_log_error("multixact id should be greater than 0");
+ exit(1);
+ }
+ }
+ break;
case 'n':
noclean = true;
printf(_("Running in no-clean mode. Mistakes will not be cleaned up.\n"));
@@ -3312,6 +3374,21 @@ main(int argc, char *argv[])
case 'N':
do_sync = false;
break;
+ case 'o':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_mxoff = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartMultiXactOffsetIsValid(start_mxoff))
+ {
+ pg_log_error("invalid initial database cluster multixact offset");
+ exit(1);
+ }
+ }
+ break;
case 'S':
sync_only = true;
break;
@@ -3400,6 +3477,30 @@ main(int argc, char *argv[])
case 20:
data_checksums = false;
break;
+ case 'x':
+ {
+ char *endptr;
+
+ errno = 0;
+ start_xid = strtou64(optarg, &endptr, 0);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0 ||
+ !StartTransactionIdIsValid(start_xid))
+ {
+ pg_log_error("invalid value for initial database cluster xid");
+ exit(1);
+ }
+ else if (start_xid < 3) /* FirstNormalTransactionId */
+ {
+ /*
+ * We avoid xid to be silently set to
+ * FirstNormalTransactionId, though it does not harm.
+ */
+ pg_log_error("xid should be greater than 2");
+ exit(1);
+ }
+ }
+ break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 01cc4a1602..8b017eb907 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -329,4 +329,64 @@ command_fails(
[ 'pg_checksums', '--pgdata' => $datadir_nochecksums ],
"pg_checksums fails with data checksum disabled");
+# Set non-standard initial mxid/mxoff/xid.
+command_fails_like(
+ [ 'initdb', '-m', 'seven', $datadir ],
+ qr/initdb: error: invalid initial database cluster multixact id/,
+ 'fails for invalid initial database cluster multixact id');
+command_fails_like(
+ [ 'initdb', '-o', 'seven', $datadir ],
+ qr/initdb: error: invalid initial database cluster multixact offset/,
+ 'fails for invalid initial database cluster multixact offset');
+command_fails_like(
+ [ 'initdb', '-x', 'seven', $datadir ],
+ qr/initdb: error: invalid value for initial database cluster xid/,
+ 'fails for invalid initial database cluster xid');
+
+command_checks_all(
+ [ 'initdb', '-m', '65535', "$tempdir/data-m65535" ],
+ 0,
+ [qr/selecting initial multixact id ... 65535/],
+ [],
+ 'selecting initial multixact id');
+command_checks_all(
+ [ 'initdb', '-o', '65535', "$tempdir/data-o65535" ],
+ 0,
+ [qr/selecting initial multixact offset ... 65535/],
+ [],
+ 'selecting initial multixact offset');
+command_checks_all(
+ [ 'initdb', '-x', '65535', "$tempdir/data-x65535" ],
+ 0,
+ [qr/selecting initial xid ... 65535/],
+ [],
+ 'selecting initial xid');
+
+# Setup new cluster with given mxid/mxoff/xid.
+my $node;
+my $result;
+
+$node = PostgreSQL::Test::Cluster->new('test-mxid');
+$node->init(extra => ['-m', '16777215']); # 0xFFFFFF
+$node->start;
+$result = $node->safe_psql('postgres', "SELECT next_multixact_id FROM pg_control_checkpoint();");
+ok($result >= 16777215, 'setup cluster with given mxid');
+$node->stop;
+
+$node = PostgreSQL::Test::Cluster->new('test-mxoff');
+$node->init(extra => ['-o', '16777215']); # 0xFFFFFF
+$node->start;
+$result = $node->safe_psql('postgres', "SELECT next_multi_offset FROM pg_control_checkpoint();");
+ok($result >= 16777215, 'setup cluster with given mxoff');
+$node->stop;
+
+$node = PostgreSQL::Test::Cluster->new('test-xid');
+$node->init(extra => ['-x', '16777215']); # 0xFFFFFF
+$node->start;
+$result = $node->safe_psql('postgres', "SELECT txid_current();");
+ok($result >= 16777215, 'setup cluster with given xid - check 1');
+$result = $node->safe_psql('postgres', "SELECT oldest_xid FROM pg_control_checkpoint();");
+ok($result >= 16777215, 'setup cluster with given xid - check 2');
+$node->stop;
+
done_testing();
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 4411c1468a..8eb34846da 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -94,6 +94,9 @@ typedef enum RecoveryState
} RecoveryState;
extern PGDLLIMPORT int wal_level;
+extern PGDLLIMPORT TransactionId start_xid;
+extern PGDLLIMPORT MultiXactId start_mxid;
+extern PGDLLIMPORT MultiXactOffset start_mxoff;
/* Is WAL archiving enabled (always or only while server is running normally)? */
#define XLogArchivingActive() \
diff --git a/src/include/c.h b/src/include/c.h
index 318194f78d..4f2b5432e5 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -622,6 +622,10 @@ typedef uint64 MultiXactOffset;
typedef uint32 CommandId;
+#define StartTransactionIdIsValid(xid) ((xid) <= 0xFFFFFFFF)
+#define StartMultiXactIdIsValid(mxid) ((mxid) <= 0xFFFFFFFF)
+#define StartMultiXactOffsetIsValid(offset) ((offset) <= 0xFFFFFFFF)
+
#define FirstCommandId ((CommandId) 0)
#define InvalidCommandId (~(CommandId)0)
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index f0d612ca48..5c63290a72 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -123,7 +123,7 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
Oid relrewrite BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_class);
/* all Xids < this are frozen in this rel */
- TransactionId relfrozenxid BKI_DEFAULT(3); /* FirstNormalTransactionId */
+ TransactionId relfrozenxid BKI_DEFAULT(RECENTXMIN); /* FirstNormalTransactionId */
/* all multixacts in this rel are >= this; it is really a MultiXactId */
TransactionId relminmxid BKI_DEFAULT(1); /* FirstMultiXactId */
--
2.43.0
[application/octet-stream] v13-0001-Use-64-bit-format-output-for-multixact-offsets.patch (9.0K, 6-v13-0001-Use-64-bit-format-output-for-multixact-offsets.patch)
download | inline diff:
From ba345510f8e52c4504c238b85512bfe864a8a6c3 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Wed, 7 Aug 2024 16:35:22 +0300
Subject: [PATCH v13 1/7] Use 64-bit format output for multixact offsets
Author: Maxim Orlov <[email protected]>
---
src/backend/access/rmgrdesc/mxactdesc.c | 9 ++++----
src/backend/access/rmgrdesc/xlogdesc.c | 4 ++--
src/backend/access/transam/multixact.c | 26 +++++++++++++----------
src/backend/access/transam/xlogrecovery.c | 5 +++--
src/bin/pg_controldata/pg_controldata.c | 4 ++--
src/bin/pg_resetwal/pg_resetwal.c | 8 +++----
6 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c
index 8bd3d5b63c..b792e9d939 100644
--- a/src/backend/access/rmgrdesc/mxactdesc.c
+++ b/src/backend/access/rmgrdesc/mxactdesc.c
@@ -65,8 +65,8 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
xl_multixact_create *xlrec = (xl_multixact_create *) rec;
int i;
- appendStringInfo(buf, "%u offset %u nmembers %d: ", xlrec->mid,
- xlrec->moff, xlrec->nmembers);
+ appendStringInfo(buf, "%u offset %llu nmembers %d: ", xlrec->mid,
+ (unsigned long long) xlrec->moff, xlrec->nmembers);
for (i = 0; i < xlrec->nmembers; i++)
out_member(buf, &xlrec->members[i]);
}
@@ -74,9 +74,10 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
{
xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec;
- appendStringInfo(buf, "offsets [%u, %u), members [%u, %u)",
+ appendStringInfo(buf, "offsets [%u, %u), members [%llu, %llu)",
xlrec->startTruncOff, xlrec->endTruncOff,
- xlrec->startTruncMemb, xlrec->endTruncMemb);
+ (unsigned long long) xlrec->startTruncMemb,
+ (unsigned long long) xlrec->endTruncMemb);
}
}
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 58040f2865..e52a5625a8 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -66,7 +66,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "redo %X/%X; "
- "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
+ "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %llu; "
"oldest xid %u in DB %u; oldest multi %u in DB %u; "
"oldest/newest commit timestamp xid: %u/%u; "
"oldest running xid %u; %s",
@@ -79,7 +79,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
XidFromFullTransactionId(checkpoint->nextXid),
checkpoint->nextOid,
checkpoint->nextMulti,
- checkpoint->nextMultiOffset,
+ (unsigned long long) checkpoint->nextMultiOffset,
checkpoint->oldestXid,
checkpoint->oldestXidDB,
checkpoint->oldestMulti,
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 27ccdf9500..623fc8bdac 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1264,7 +1264,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
LWLockRelease(MultiXactGenLock);
- debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
+ debug_elog4(DEBUG2, "GetNew: returning %u offset %llu", result,
+ (unsigned long long) *offset);
return result;
}
@@ -2293,8 +2294,9 @@ MultiXactGetCheckptMulti(bool is_shutdown,
LWLockRelease(MultiXactGenLock);
debug_elog6(DEBUG2,
- "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
- *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
+ "MultiXact: checkpoint is nextMulti %u, nextOffset %llu, oldestMulti %u in DB %u",
+ *nextMulti, (unsigned long long) *nextMultiOffset, *oldestMulti,
+ *oldestMultiDB);
}
/*
@@ -2328,8 +2330,8 @@ void
MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset)
{
- debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
- nextMulti, nextMultiOffset);
+ debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %llu",
+ nextMulti, (unsigned long long) nextMultiOffset);
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->nextMXact = nextMulti;
MultiXactState->nextOffset = nextMultiOffset;
@@ -2519,8 +2521,8 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
}
if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
{
- debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
- minMultiOffset);
+ debug_elog3(DEBUG2, "MultiXact: setting next offset to %llu",
+ (unsigned long long) minMultiOffset);
MultiXactState->nextOffset = minMultiOffset;
}
LWLockRelease(MultiXactGenLock);
@@ -3211,11 +3213,12 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
elog(DEBUG1, "performing multixact truncation: "
"offsets [%u, %u), offsets segments [%llx, %llx), "
- "members [%u, %u), members segments [%llx, %llx)",
+ "members [%llu, %llu), members segments [%llx, %llx)",
oldestMulti, newOldestMulti,
(unsigned long long) MultiXactIdToOffsetSegment(oldestMulti),
(unsigned long long) MultiXactIdToOffsetSegment(newOldestMulti),
- oldestOffset, newOldestOffset,
+ (unsigned long long) oldestOffset,
+ (unsigned long long) newOldestOffset,
(unsigned long long) MXOffsetToMemberSegment(oldestOffset),
(unsigned long long) MXOffsetToMemberSegment(newOldestOffset));
@@ -3471,11 +3474,12 @@ multixact_redo(XLogReaderState *record)
elog(DEBUG1, "replaying multixact truncation: "
"offsets [%u, %u), offsets segments [%llx, %llx), "
- "members [%u, %u), members segments [%llx, %llx)",
+ "members [%llu, %llu), members segments [%llx, %llx)",
xlrec.startTruncOff, xlrec.endTruncOff,
(unsigned long long) MultiXactIdToOffsetSegment(xlrec.startTruncOff),
(unsigned long long) MultiXactIdToOffsetSegment(xlrec.endTruncOff),
- xlrec.startTruncMemb, xlrec.endTruncMemb,
+ (unsigned long long) xlrec.startTruncMemb,
+ (unsigned long long) xlrec.endTruncMemb,
(unsigned long long) MXOffsetToMemberSegment(xlrec.startTruncMemb),
(unsigned long long) MXOffsetToMemberSegment(xlrec.endTruncMemb));
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index cf2b007806..d5464d426c 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -876,8 +876,9 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
U64FromFullTransactionId(checkPoint.nextXid),
checkPoint.nextOid)));
ereport(DEBUG1,
- (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
- checkPoint.nextMulti, checkPoint.nextMultiOffset)));
+ (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %llu",
+ checkPoint.nextMulti,
+ (unsigned long long) checkPoint.nextMultiOffset)));
ereport(DEBUG1,
(errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
checkPoint.oldestXid, checkPoint.oldestXidDB)));
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 93a05d80ca..43b6727570 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -253,8 +253,8 @@ main(int argc, char *argv[])
ControlFile->checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
ControlFile->checkPointCopy.nextMulti);
- printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
- ControlFile->checkPointCopy.nextMultiOffset);
+ printf(_("Latest checkpoint's NextMultiOffset: %llu\n"),
+ (unsigned long long) ControlFile->checkPointCopy.nextMultiOffset);
printf(_("Latest checkpoint's oldestXID: %u\n"),
ControlFile->checkPointCopy.oldestXid);
printf(_("Latest checkpoint's oldestXID's DB: %u\n"),
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index ed73607a46..fff401e469 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -737,8 +737,8 @@ PrintControlValues(bool guessed)
ControlFile.checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
ControlFile.checkPointCopy.nextMulti);
- printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
- ControlFile.checkPointCopy.nextMultiOffset);
+ printf(_("Latest checkpoint's NextMultiOffset: %llu\n"),
+ (unsigned long long) ControlFile.checkPointCopy.nextMultiOffset);
printf(_("Latest checkpoint's oldestXID: %u\n"),
ControlFile.checkPointCopy.oldestXid);
printf(_("Latest checkpoint's oldestXID's DB: %u\n"),
@@ -809,8 +809,8 @@ PrintNewControlValues(void)
if (set_mxoff != -1)
{
- printf(_("NextMultiOffset: %u\n"),
- ControlFile.checkPointCopy.nextMultiOffset);
+ printf(_("NextMultiOffset: %llu\n"),
+ (unsigned long long) ControlFile.checkPointCopy.nextMultiOffset);
}
if (set_oid != 0)
--
2.43.0
[application/octet-stream] v13-0002-Use-64-bit-multixact-offsets.patch (36.2K, 7-v13-0002-Use-64-bit-multixact-offsets.patch)
download | inline diff:
From f3499102e2893e4b2e24d48975cbbd49385e190f Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Wed, 6 Mar 2024 11:11:33 +0300
Subject: [PATCH v13 2/7] Use 64-bit multixact offsets.
Author: Maxim Orlov <[email protected]>
---
src/backend/access/transam/multixact.c | 541 ++----------------------
src/backend/access/transam/xlog.c | 2 +-
src/backend/commands/vacuum.c | 2 +-
src/backend/postmaster/autovacuum.c | 4 +-
src/bin/pg_resetwal/pg_resetwal.c | 2 +-
src/bin/pg_resetwal/t/001_basic.pl | 2 +-
src/include/access/multixact.h | 3 +-
src/include/access/multixact_internal.h | 115 +++++
src/include/c.h | 2 +-
9 files changed, 156 insertions(+), 517 deletions(-)
create mode 100644 src/include/access/multixact_internal.h
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 623fc8bdac..cd9db52e95 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -69,6 +69,7 @@
#include "postgres.h"
#include "access/multixact.h"
+#include "access/multixact_internal.h"
#include "access/slru.h"
#include "access/transam.h"
#include "access/twophase.h"
@@ -92,130 +93,14 @@
#include "utils/injection_point.h"
#include "utils/memutils.h"
-
-/*
- * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
- * used everywhere else in Postgres.
- *
- * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
- * MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in TruncateMultiXact (see
- * MultiXactOffsetPagePrecedes).
- */
-
-/* We need four bytes per offset */
-#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
-
-static inline int64
-MultiXactIdToOffsetPage(MultiXactId multi)
-{
- return multi / MULTIXACT_OFFSETS_PER_PAGE;
-}
-
-static inline int
-MultiXactIdToOffsetEntry(MultiXactId multi)
-{
- return multi % MULTIXACT_OFFSETS_PER_PAGE;
-}
-
-static inline int64
-MultiXactIdToOffsetSegment(MultiXactId multi)
-{
- return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
-}
-
-/*
- * The situation for members is a bit more complex: we store one byte of
- * additional flag bits for each TransactionId. To do this without getting
- * into alignment issues, we store four bytes of flags, and then the
- * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
- * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
- * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
- * performance) trumps space efficiency here.
- *
- * Note that the "offset" macros work with byte offset, not array indexes, so
- * arithmetic must be done using "char *" pointers.
- */
-/* We need eight bits per xact, so one xact fits in a byte */
-#define MXACT_MEMBER_BITS_PER_XACT 8
-#define MXACT_MEMBER_FLAGS_PER_BYTE 1
-#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
-
-/* how many full bytes of flags are there in a group? */
-#define MULTIXACT_FLAGBYTES_PER_GROUP 4
-#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
- (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
-/* size in bytes of a complete group */
-#define MULTIXACT_MEMBERGROUP_SIZE \
- (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
-#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
-#define MULTIXACT_MEMBERS_PER_PAGE \
- (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
-
/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages. (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
+ * Multixact members warning threshold.
+ *
+ * If difference bettween nextOffset and oldestOffset exceed this value, we
+ * trigger autovacuumin order to release the disk space, reduce table bloat if
+ * possible.
*/
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
- ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
-/* page in which a member is to be found */
-static inline int64
-MXOffsetToMemberPage(MultiXactOffset offset)
-{
- return offset / MULTIXACT_MEMBERS_PER_PAGE;
-}
-
-static inline int64
-MXOffsetToMemberSegment(MultiXactOffset offset)
-{
- return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
-}
-
-/* Location (byte offset within page) of flag word for a given member */
-static inline int
-MXOffsetToFlagsOffset(MultiXactOffset offset)
-{
- MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
- int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
- int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
-
- return byteoff;
-}
-
-static inline int
-MXOffsetToFlagsBitShift(MultiXactOffset offset)
-{
- int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
- int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
-
- return bshift;
-}
-
-/* Location (byte offset within page) of TransactionId of given member */
-static inline int
-MXOffsetToMemberOffset(MultiXactOffset offset)
-{
- int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
-
- return MXOffsetToFlagsOffset(offset) +
- MULTIXACT_FLAGBYTES_PER_GROUP +
- member_in_group * sizeof(TransactionId);
-}
-
-/* Multixact members wraparound thresholds. */
-#define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 2)
-#define MULTIXACT_MEMBER_DANGER_THRESHOLD \
- (MaxMultiXactOffset - MaxMultiXactOffset / 4)
+#define MULTIXACT_MEMBER_AUTOVAC_THRESHOLD UINT64CONST(0xFFFFFFFF)
static inline MultiXactId
PreviousMultiXactId(MultiXactId multi)
@@ -260,11 +145,9 @@ typedef struct MultiXactStateData
/*
* Oldest multixact offset that is potentially referenced by a multixact
- * referenced by a relation. We don't always know this value, so there's
- * a flag here to indicate whether or not we currently do.
+ * referenced by a relation.
*/
MultiXactOffset oldestOffset;
- bool oldestOffsetKnown;
/* support for anti-wraparound measures */
MultiXactId multiVacLimit;
@@ -272,9 +155,6 @@ typedef struct MultiXactStateData
MultiXactId multiStopLimit;
MultiXactId multiWrapLimit;
- /* support for members anti-wraparound measures */
- MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
-
/*
* This is used to sleep until a multixact offset is written when we want
* to create the next one.
@@ -409,10 +289,8 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
- MultiXactOffset start, uint32 distance);
static bool SetOffsetVacuumLimit(bool is_startup);
-static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
+static MultiXactOffset find_multixact_start(MultiXactId multi);
static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
static void WriteMTruncateXlogRec(Oid oldestMultiDB,
MultiXactId startTruncOff,
@@ -1054,9 +932,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* against catastrophic data loss due to multixact wraparound. The basic
* rules are:
*
- * If we're past multiVacLimit or the safe threshold for member storage
- * space, or we don't know what the safe threshold for member storage is,
- * start trying to force autovacuum cycles.
+ * If we're past multiVacLimit, start trying to force autovacuum cycles.
* If we're past multiWarnLimit, start issuing warnings.
* If we're past multiStopLimit, refuse to create new MultiXactIds.
*
@@ -1151,90 +1027,10 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
ExtendMultiXactOffset(result);
/*
- * Reserve the members space, similarly to above. Also, be careful not to
- * return zero as the starting offset for any multixact. See
- * GetMultiXactIdMembers() for motivation.
+ * Reserve the members space, similarly to above.
*/
nextOffset = MultiXactState->nextOffset;
- if (nextOffset == 0)
- {
- *offset = 1;
- nmembers++; /* allocate member slot 0 too */
- }
- else
- *offset = nextOffset;
-
- /*----------
- * Protect against overrun of the members space as well, with the
- * following rules:
- *
- * If we're past offsetStopLimit, refuse to generate more multis.
- * If we're close to offsetStopLimit, emit a warning.
- *
- * Arbitrarily, we start emitting warnings when we're 20 segments or less
- * from offsetStopLimit.
- *
- * Note we haven't updated the shared state yet, so if we fail at this
- * point, the multixact ID we grabbed can still be used by the next guy.
- *
- * Note that there is no point in forcing autovacuum runs here: the
- * multixact freeze settings would have to be reduced for that to have any
- * effect.
- *----------
- */
-#define OFFSET_WARN_SEGMENTS 20
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
- nmembers))
- {
- /* see comment in the corresponding offsets wraparound case */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("multixact \"members\" limit exceeded"),
- errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
- "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
- MultiXactState->offsetStopLimit - nextOffset - 1,
- nmembers,
- MultiXactState->offsetStopLimit - nextOffset - 1),
- errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
- MultiXactState->oldestMultiXactDB)));
- }
-
- /*
- * Check whether we should kick autovacuum into action, to prevent members
- * wraparound. NB we use a much larger window to trigger autovacuum than
- * just the warning limit. The warning is just a measure of last resort -
- * this is in line with GetNewTransactionId's behaviour.
- */
- if (!MultiXactState->oldestOffsetKnown ||
- (MultiXactState->nextOffset - MultiXactState->oldestOffset
- > MULTIXACT_MEMBER_SAFE_THRESHOLD))
- {
- /*
- * To avoid swamping the postmaster with signals, we issue the autovac
- * request only when crossing a segment boundary. With default
- * compilation settings that's roughly after 50k members. This still
- * gives plenty of chances before we get into real trouble.
- */
- if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
- (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- }
-
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
- nextOffset,
- nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
- ereport(WARNING,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
- "database with OID %u must be vacuumed before %d more multixact members are used",
- MultiXactState->offsetStopLimit - nextOffset + nmembers,
- MultiXactState->oldestMultiXactDB,
- MultiXactState->offsetStopLimit - nextOffset + nmembers),
- errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
+ *offset = nextOffset;
ExtendMultiXactMember(nextOffset, nmembers);
@@ -2620,22 +2416,9 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
}
/*
- * Compute the number of items till end of current page. Careful: if
- * addition of unsigned ints wraps around, we're at the last page of
- * the last segment; since that page holds a different number of items
- * than other pages, we need to do it differently.
+ * Compute the number of items till end of current page.
*/
- if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
- {
- /*
- * This is the last page of the last segment; we can compute the
- * number of items left to allocate in it without modulo
- * arithmetic.
- */
- difference = MaxMultiXactOffset - offset + 1;
- }
- else
- difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+ difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
/*
* Advance to next page, taking care to properly handle the wraparound
@@ -2701,15 +2484,13 @@ GetOldestMultiXactId(void)
}
/*
- * Determine how aggressively we need to vacuum in order to prevent member
- * wraparound.
+ * Determine if we need to vacuum for member or not.
*
* To do so determine what's the oldest member offset and install the limit
* info in MultiXactState, where it can be used to prevent overrun of old data
* in the members SLRU area.
*
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
+ * The return value is true if autovacuum is required and false otherwise.
*/
static bool
SetOffsetVacuumLimit(bool is_startup)
@@ -2717,12 +2498,7 @@ SetOffsetVacuumLimit(bool is_startup)
MultiXactId oldestMultiXactId;
MultiXactId nextMXact;
MultiXactOffset oldestOffset = 0; /* placate compiler */
- MultiXactOffset prevOldestOffset;
MultiXactOffset nextOffset;
- bool oldestOffsetKnown = false;
- bool prevOldestOffsetKnown;
- MultiXactOffset offsetStopLimit = 0;
- MultiXactOffset prevOffsetStopLimit;
/*
* NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2735,9 +2511,6 @@ SetOffsetVacuumLimit(bool is_startup)
oldestMultiXactId = MultiXactState->oldestMultiXactId;
nextMXact = MultiXactState->nextMXact;
nextOffset = MultiXactState->nextOffset;
- prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- prevOldestOffset = MultiXactState->oldestOffset;
- prevOffsetStopLimit = MultiXactState->offsetStopLimit;
Assert(MultiXactState->finishedStartup);
LWLockRelease(MultiXactGenLock);
@@ -2755,139 +2528,31 @@ SetOffsetVacuumLimit(bool is_startup)
* offset.
*/
oldestOffset = nextOffset;
- oldestOffsetKnown = true;
}
else
- {
- /*
- * Figure out where the oldest existing multixact's offsets are
- * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
- * the supposedly-earliest multixact might not really exist. We are
- * careful not to fail in that case.
- */
- oldestOffsetKnown =
- find_multixact_start(oldestMultiXactId, &oldestOffset);
-
- if (oldestOffsetKnown)
- ereport(DEBUG1,
- (errmsg_internal("oldest MultiXactId member is at offset %u",
- oldestOffset)));
- else
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
- oldestMultiXactId)));
- }
+ oldestOffset = find_multixact_start(oldestMultiXactId);
LWLockRelease(MultiXactTruncationLock);
- /*
- * If we can, compute limits (and install them MultiXactState) to prevent
- * overrun of old data in the members SLRU area. We can only do so if the
- * oldest offset is known though.
- */
- if (oldestOffsetKnown)
- {
- /* move back to start of the corresponding segment */
- offsetStopLimit = oldestOffset - (oldestOffset %
- (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
- /* always leave one segment before the wraparound point */
- offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
- if (!prevOldestOffsetKnown && !is_startup)
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are now enabled")));
-
- ereport(DEBUG1,
- (errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
- offsetStopLimit, oldestMultiXactId)));
- }
- else if (prevOldestOffsetKnown)
- {
- /*
- * If we failed to get the oldest offset this time, but we have a
- * value from a previous pass through this function, use the old
- * values rather than automatically forcing an emergency autovacuum
- * cycle again.
- */
- oldestOffset = prevOldestOffset;
- oldestOffsetKnown = true;
- offsetStopLimit = prevOffsetStopLimit;
- }
-
/* Install the computed values */
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->oldestOffset = oldestOffset;
- MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
- MultiXactState->offsetStopLimit = offsetStopLimit;
LWLockRelease(MultiXactGenLock);
/*
- * Do we need an emergency autovacuum? If we're not sure, assume yes.
- */
- return !oldestOffsetKnown ||
- (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
-}
-
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name. The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
- uint32 distance)
-{
- MultiXactOffset finish;
-
- /*
- * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
- * if the addition wraps around the UINT_MAX boundary, skip that value.
- */
- finish = start + distance;
- if (finish < start)
- finish++;
-
- /*-----------------------------------------------------------------------
- * When the boundary is numerically greater than the starting point, any
- * value numerically between the two is not wrapped:
- *
- * <----S----B---->
- * [---) = F wrapped past B (and UINT_MAX)
- * [---) = F not wrapped
- * [----] = F wrapped past B
- *
- * When the boundary is numerically less than the starting point (i.e. the
- * UINT_MAX wraparound occurs somewhere in between) then all values in
- * between are wrapped:
- *
- * <----B----S---->
- * [---) = F not wrapped past B (but wrapped past UINT_MAX)
- * [---) = F wrapped past B (and UINT_MAX)
- * [----] = F not wrapped
- *-----------------------------------------------------------------------
+ * Do we need autovacuum?
*/
- if (start < boundary)
- return finish >= boundary || finish < start;
- else
- return finish >= boundary && finish < start;
+ return (nextOffset - oldestOffset > MULTIXACT_MEMBER_AUTOVAC_THRESHOLD);
}
/*
* Find the starting offset of the given MultiXactId.
*
- * Returns false if the file containing the multi does not exist on disk.
- * Otherwise, returns true and sets *result to the starting member offset.
- *
* This function does not prevent concurrent truncation, so if that's
* required, the caller has to protect against that.
*/
-static bool
-find_multixact_start(MultiXactId multi, MultiXactOffset *result)
+static MultiXactOffset
+find_multixact_start(MultiXactId multi)
{
MultiXactOffset offset;
int64 pageno;
@@ -2900,15 +2565,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
pageno = MultiXactIdToOffsetPage(multi);
entryno = MultiXactIdToOffsetEntry(multi);
- /*
- * Write out dirty data, so PhysicalPageExists can work correctly.
- */
- SimpleLruWriteAll(MultiXactOffsetCtl, true);
- SimpleLruWriteAll(MultiXactMemberCtl, true);
-
- if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
- return false;
-
/* lock is acquired by SimpleLruReadPage_ReadOnly */
slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
@@ -2916,102 +2572,7 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
offset = *offptr;
LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
- *result = offset;
- return true;
-}
-
-/*
- * Determine how many multixacts, and how many multixact members, currently
- * exist. Return false if unable to determine.
- */
-static bool
-ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
-{
- MultiXactOffset nextOffset;
- MultiXactOffset oldestOffset;
- MultiXactId oldestMultiXactId;
- MultiXactId nextMultiXactId;
- bool oldestOffsetKnown;
-
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- nextOffset = MultiXactState->nextOffset;
- oldestMultiXactId = MultiXactState->oldestMultiXactId;
- nextMultiXactId = MultiXactState->nextMXact;
- oldestOffset = MultiXactState->oldestOffset;
- oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- LWLockRelease(MultiXactGenLock);
-
- if (!oldestOffsetKnown)
- return false;
-
- *members = nextOffset - oldestOffset;
- *multixacts = nextMultiXactId - oldestMultiXactId;
- return true;
-}
-
-/*
- * Multixact members can be removed once the multixacts that refer to them
- * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
- * vacuum_multixact_freeze_table_age work together to make sure we never have
- * too many multixacts; we hope that, at least under normal circumstances,
- * this will also be sufficient to keep us from using too many offsets.
- * However, if the average multixact has many members, we might exhaust the
- * members space while still using few enough members that these limits fail
- * to trigger relminmxid advancement by VACUUM. At that point, we'd have no
- * choice but to start failing multixact-creating operations with an error.
- *
- * To prevent that, if more than a threshold portion of the members space is
- * used, we effectively reduce autovacuum_multixact_freeze_max_age and
- * to a value just less than the number of multixacts in use. We hope that
- * this will quickly trigger autovacuuming on the table or tables with the
- * oldest relminmxid, thus allowing datminmxid values to advance and removing
- * some members.
- *
- * As the fraction of the member space currently in use grows, we become
- * more aggressive in clamping this value. That not only causes autovacuum
- * to ramp up, but also makes any manual vacuums the user issues more
- * aggressive. This happens because vacuum_get_cutoffs() will clamp the
- * freeze table and the minimum freeze age cutoffs based on the effective
- * autovacuum_multixact_freeze_max_age this function returns. In the worst
- * case, we'll claim the freeze_max_age to zero, and every vacuum of any
- * table will freeze every multixact.
- */
-int
-MultiXactMemberFreezeThreshold(void)
-{
- MultiXactOffset members;
- uint32 multixacts;
- uint32 victim_multixacts;
- double fraction;
- int result;
-
- /* If we can't determine member space utilization, assume the worst. */
- if (!ReadMultiXactCounts(&multixacts, &members))
- return 0;
-
- /* If member space utilization is low, no special action is required. */
- if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
- return autovacuum_multixact_freeze_max_age;
-
- /*
- * Compute a target for relminmxid advancement. The number of multixacts
- * we try to eliminate from the system is based on how far we are past
- * MULTIXACT_MEMBER_SAFE_THRESHOLD.
- */
- fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
- (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
- victim_multixacts = multixacts * fraction;
-
- /* fraction could be > 1.0, but lowest possible freeze age is zero */
- if (victim_multixacts > multixacts)
- return 0;
- result = multixacts - victim_multixacts;
-
- /*
- * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
- * autovacuum less aggressive than it would otherwise be.
- */
- return Min(result, autovacuum_multixact_freeze_max_age);
+ return offset;
}
typedef struct mxtruncinfo
@@ -3039,37 +2600,13 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
/*
- * Delete members segments [oldest, newOldest)
- *
- * The members SLRU can, in contrast to the offsets one, be filled to almost
- * the full range at once. This means SimpleLruTruncate() can't trivially be
- * used - instead the to-be-deleted range is computed using the offsets
- * SLRU. C.f. TruncateMultiXact().
+ * Delete members segments before the newOldestOffset.
*/
static void
-PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
+PerformMembersTruncation(MultiXactOffset newOldestOffset)
{
- const int64 maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
- int64 startsegment = MXOffsetToMemberSegment(oldestOffset);
- int64 endsegment = MXOffsetToMemberSegment(newOldestOffset);
- int64 segment = startsegment;
-
- /*
- * Delete all the segments but the last one. The last segment can still
- * contain, possibly partially, valid data.
- */
- while (segment != endsegment)
- {
- elog(DEBUG2, "truncating multixact members segment %llx",
- (unsigned long long) segment);
- SlruDeleteSegment(MultiXactMemberCtl, segment);
-
- /* move to next segment, handling wraparound correctly */
- if (segment == maxsegment)
- segment = 0;
- else
- segment += 1;
- }
+ SimpleLruTruncate(MultiXactMemberCtl,
+ MXOffsetToMemberPage(newOldestOffset));
}
/*
@@ -3174,23 +2711,15 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
/*
* First, compute the safe truncation point for MultiXactMember. This is
* the starting offset of the oldest multixact.
- *
- * Hopefully, find_multixact_start will always work here, because we've
- * already checked that it doesn't precede the earliest MultiXact on disk.
- * But if it fails, don't truncate anything, and log a message.
*/
if (oldestMulti == nextMulti)
{
/* there are NO MultiXacts */
oldestOffset = nextOffset;
}
- else if (!find_multixact_start(oldestMulti, &oldestOffset))
+ else
{
- ereport(LOG,
- (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation",
- oldestMulti, earliest)));
- LWLockRelease(MultiXactTruncationLock);
- return;
+ oldestOffset = find_multixact_start(oldestMulti);
}
/*
@@ -3202,13 +2731,9 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
/* there are NO MultiXacts */
newOldestOffset = nextOffset;
}
- else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
+ else
{
- ereport(LOG,
- (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
- newOldestMulti)));
- LWLockRelease(MultiXactTruncationLock);
- return;
+ newOldestOffset = find_multixact_start(newOldestMulti);
}
elog(DEBUG1, "performing multixact truncation: "
@@ -3258,7 +2783,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
LWLockRelease(MultiXactGenLock);
/* First truncate members */
- PerformMembersTruncation(oldestOffset, newOldestOffset);
+ PerformMembersTruncation(newOldestOffset);
/* Then offsets */
PerformOffsetsTruncation(oldestMulti, newOldestMulti);
@@ -3345,7 +2870,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
static bool
MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
{
- int32 diff = (int32) (offset1 - offset2);
+ int64 diff = (int64) (offset1 - offset2);
return (diff < 0);
}
@@ -3492,7 +3017,7 @@ multixact_redo(XLogReaderState *record)
*/
SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false);
- PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
+ PerformMembersTruncation(xlrec.endTruncMemb);
/*
* During XLOG replay, latest_page_number isn't necessarily set up
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bf3dbda901..a813a090fa 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5083,7 +5083,7 @@ BootStrapXLOG(uint32 data_checksum_version)
FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
checkPoint.nextOid = FirstGenbkiObjectId;
checkPoint.nextMulti = FirstMultiXactId;
- checkPoint.nextMultiOffset = 0;
+ checkPoint.nextMultiOffset = 1;
checkPoint.oldestXid = FirstNormalTransactionId;
checkPoint.oldestXidDB = Template1DbOid;
checkPoint.oldestMulti = FirstMultiXactId;
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e6745e6145..c96fbf004d 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1134,7 +1134,7 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Almost ready to set freeze output parameters; check if OldestXmin or
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 0ab921a169..ed5fc09c38 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -1134,7 +1134,7 @@ do_start_worker(void)
/* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId();
- multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
+ multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age;
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
@@ -1922,7 +1922,7 @@ do_autovacuum(void)
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Find the pg_database entry and select the default freeze ages. We use
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index fff401e469..4ad64cf1ed 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -264,7 +264,7 @@ main(int argc, char *argv[])
case 'O':
errno = 0;
- set_mxoff = strtoul(optarg, &endptr, 0);
+ set_mxoff = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-O");
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl
index 323cd483cf..e107646875 100644
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -207,7 +207,7 @@ push @cmd,
sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1]));
@files = get_slru_files('pg_multixact/offsets');
-$mult = 32 * $blcksz / 4;
+$mult = 32 * $blcksz / 8;
# --multixact-ids argument is "new,old"
push @cmd,
'--multixact-ids' => sprintf("%d,%d",
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 4e6b0eec2f..5ee632dfe6 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -27,7 +27,7 @@
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
-#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
+#define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF)
/*
* Possible multixact lock modes ("status"). The first four modes are for
@@ -143,7 +143,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
-extern int MultiXactMemberFreezeThreshold(void);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
diff --git a/src/include/access/multixact_internal.h b/src/include/access/multixact_internal.h
new file mode 100644
index 0000000000..39e74a21c7
--- /dev/null
+++ b/src/include/access/multixact_internal.h
@@ -0,0 +1,115 @@
+/*
+ * multixact_internal.h
+ *
+ * Internal definitions for the on-disk format of multixact manager.
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/multixact_internal.h
+ */
+#ifndef MULTIXACT_INTERNAL_H
+#define MULTIXACT_INTERNAL_H
+
+/* FIXME: had to duplicate this */
+#define SLRU_PAGES_PER_SEGMENT 32
+
+/*
+ * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
+ * used everywhere else in Postgres.
+ */
+
+/* We need four bytes per offset */
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+
+static inline int64
+MultiXactIdToOffsetPage(MultiXactId multi)
+{
+ return multi / MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int
+MultiXactIdToOffsetEntry(MultiXactId multi)
+{
+ return multi % MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int64
+MultiXactIdToOffsetSegment(MultiXactId multi)
+{
+ return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId. To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT 8
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+static inline int64
+MXOffsetToMemberPage(MultiXactOffset offset)
+{
+ return offset / MULTIXACT_MEMBERS_PER_PAGE;
+}
+
+static inline int64
+MXOffsetToMemberSegment(MultiXactOffset offset)
+{
+ return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/* Location (byte offset within page) of flag word for a given member */
+static inline int
+MXOffsetToFlagsOffset(MultiXactOffset offset)
+{
+ MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
+ int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
+
+ return byteoff;
+}
+
+static inline int
+MXOffsetToFlagsBitShift(MultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
+
+ return bshift;
+}
+
+/* Location (byte offset within page) of TransactionId of given member */
+static inline int
+MXOffsetToMemberOffset(MultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+
+ return MXOffsetToFlagsOffset(offset) +
+ MULTIXACT_FLAGBYTES_PER_GROUP +
+ member_in_group * sizeof(TransactionId);
+}
+
+#endif /* MULTIXACT_INTERNAL_H */
diff --git a/src/include/c.h b/src/include/c.h
index a14c631516..318194f78d 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -618,7 +618,7 @@ typedef uint32 SubTransactionId;
/* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
typedef TransactionId MultiXactId;
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
typedef uint32 CommandId;
--
2.43.0
[text/plain] v13-0006-TEST-try-to-replicate-buggy-oldest-offset.patch.txt (2.1K, 8-v13-0006-TEST-try-to-replicate-buggy-oldest-offset.patch.txt)
download | inline diff:
From 4a2e64b44cf8ec22a264d8fa495432f535482fb4 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Fri, 27 Dec 2024 19:39:58 +0300
Subject: [PATCH v13 6/7] TEST: try to replicate buggy oldest offset
---
src/bin/pg_upgrade/t/005_offset.pl | 59 ++++++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
diff --git a/src/bin/pg_upgrade/t/005_offset.pl b/src/bin/pg_upgrade/t/005_offset.pl
index df84186de4..2d91d101fa 100644
--- a/src/bin/pg_upgrade/t/005_offset.pl
+++ b/src/bin/pg_upgrade/t/005_offset.pl
@@ -305,6 +305,8 @@ my @TESTS = (
100, 101, 102, 103, 104, 105, 106,
# self upgrade
1000,
+ # buggy
+ 2000
);
# =============================================================================
@@ -560,4 +562,61 @@ SKIP:
ok(1, "TEST $TEST_NO PASSED");
}
+# =============================================================================
+# Buggy
+# =============================================================================
+
+SKIP:
+{
+ my $TEST_NO = 2000;
+ skip "do not test case $TEST_NO", 1
+ unless ( grep( /^$TEST_NO$/, @TESTS ) );
+
+ if (!defined($ENV{oldinstall}))
+ {
+ die "oldinstall is not defined";
+ }
+
+ my $dbname = 'buggy';
+ my $newnode = PostgreSQL::Test::Cluster->new("new_$dbname");
+ my $oldnode = PostgreSQL::Test::Cluster->new("old_$dbname",
+ install_path => $ENV{oldinstall});
+ $newnode->init;
+ $oldnode->init(force_initdb => 1);
+
+ $oldnode->append_conf('postgresql.conf', q(
+ autovacuum = off
+ max_prepared_transactions = 2
+ fsync = off
+ ));
+ $oldnode->start;
+
+ mxid_gen2($oldnode, 'FOO', 1.25);
+ mxid_gen2($oldnode, 'BAR', 1.25);
+
+ $oldnode->safe_psql('postgres', q(
+ DROP TABLE BAR;
+ CHECKPOINT;
+ ));
+ $oldnode->stop;
+
+ unlink($oldnode->data_dir . "/pg_multixact/offsets/0000");
+
+ command_ok(
+ [
+ 'pg_upgrade', '--no-sync',
+ '-d', $oldnode->data_dir,
+ '-D', $newnode->data_dir,
+ '-b', $oldnode->config_data('--bindir'),
+ '-B', $newnode->config_data('--bindir'),
+ '-s', $newnode->host,
+ '-p', $oldnode->port,
+ '-P', $newnode->port,
+ '--copy'
+ ],
+ 'run of pg_upgrade');
+
+ ok(1, "TEST $TEST_NO PASSED");
+}
+
done_testing();
--
2.43.0
[text/plain] v13-0007-TEST-bump-catver.patch.txt (1.1K, 9-v13-0007-TEST-bump-catver.patch.txt)
download | inline diff:
From 52b7019b4b964bd221de2c891d2b1f073b7465bf Mon Sep 17 00:00:00 2001
From: Maxim Orlov <[email protected]>
Date: Wed, 13 Nov 2024 16:34:34 +0300
Subject: [PATCH v13 7/7] TEST: bump catver
---
src/bin/pg_upgrade/pg_upgrade.h | 2 +-
src/include/catalog/catversion.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 9b3d645b08..0fd791c442 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -119,7 +119,7 @@ extern char *output_files[];
*
* XXX: should be changed to the actual CATALOG_VERSION_NO on commit.
*/
-#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202409041
+#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202501283
/*
* large object chunk size added to pg_controldata,
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 6edaa20368..dfcb940501 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202501282
+#define CATALOG_VERSION_NO 202501283
#endif
--
2.43.0
view thread (79+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected], [email protected]
Subject: Re: POC: make mxidoff 64 bits
In-Reply-To: <CACG=ezYbYO_KHWdeDedbDcY0tOS0JfaqBxG3=bG5+DdsDK4MpQ@mail.gmail.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox