MIME-Version: 1.0
References: 
 <CAB-JLwbBFNuASyEnZWP0Tck9uNkthBZqi6WoXNevUT6+mV8XmA@mail.gmail.com>
In-Reply-To: 
 <CAB-JLwbBFNuASyEnZWP0Tck9uNkthBZqi6WoXNevUT6+mV8XmA@mail.gmail.com>
From: Marcos Pegoraro <marcos@f10.com.br>
Date: Sun, 7 Dec 2025 11:22:27 -0300
Message-ID: 
 <CAB-JLwYdb6oHaQfLg87ou2QDgvNUTp1PrOXGiNbVtNdgzR5mGQ@mail.gmail.com>
Subject: Re: Initial COPY of Logical Replication is too slow
To: PostgreSQL Hackers <pgsql-hackers@lists.postgresql.org>
Content-Type: multipart/alternative; boundary="000000000000ac04f606455d6a6a"
Archived-At: 
 <https://www.postgresql.org/message-id/CAB-JLwYdb6oHaQfLg87ou2QDgvNUTp1PrOXGiNbVtNdgzR5mGQ%40mail.gmail.com>
Precedence: bulk

--000000000000ac04f606455d6a6a
Content-Type: text/plain; charset="UTF-8"

You can see how much time a subscriber will need to get all files which
were added with this.
Run first time and will create 10 thousand tables, publish them and measure
how much time to get all tables Ready on pg_subscription_rel.
Run again to add more 10 thousand tables and see that time will increase,
more tables and more time.

This is just to show that if you create a subscription with a high number
of tables it spends more time doing select on pg_get_publication_tables
than the time spent actually copying. My use case I have 50 thousand
tables, and it takes 5 seconds every time it needs to get next table to
copy.

--Create a empty publication
create publication my_pub;

--Run these 3 following anonymous blocks to create schemas, tables and add
them to publication.
--Need to have 3 blocks because I cannot create a table in a schema that is
not committed. And the same for a publication.
do $$ declare Schemas_Add integer = 100; Actual_Schema text; begin
  for Actual_Schema in select 'test_'||(select
to_char(coalesce(max(substring(nspname,'test_(\d+)')::integer),0)+g,'FM00000')
                                       from pg_namespace where nspname ~
'test_\d+') from generate_series(1,Schemas_Add) g loop
    execute format('create schema %s',Actual_Schema);
  end loop;
end;$$;

do $$ declare Tables_Add integer = 100; Actual_Schema text; begin
  for Actual_Schema in select nspname from pg_namespace where nspname  ~
'test_\d+' and
                      not exists(select from pg_class where relnamespace =
pg_namespace.oid) loop
    for j in 1..Tables_Add loop
      execute format('create table %s.test_%s as select
generate_series(1,random(0,10))::integer id;',
                     Actual_Schema,to_char(j,'FM00000'));
    end loop;
  end loop;
end;$$;

do $$ declare Schemas_To_Add text = (select string_agg(nspname,',') from
pg_namespace n where nspname ~ 'test_\d+' and
                              not exists(select from
pg_publication_namespace where pnnspid = n.oid)); begin
  execute format('alter publication my_pub add tables in schema
%s;',Schemas_To_Add);
end;$$;

--Then you can see what was generated and go to the subscriber side to
refresh the subscription and measure time spent to synchronize.
select * from pg_Namespace where nspname ~ 'test_\d+';
select pnnspid::regnamespace, * from pg_publication_namespace;
select oid::regclass, * from pg_Class where
relnamespace::regnamespace::text ~ 'test_\d+' and relkind = 'r';

--Later just clean what you do.
drop publication my_pub;

do $$ declare Schema_Drop text; begin
  for Schema_Drop in select nspname from pg_Namespace where nspname ~
'test_\d+' loop
    execute format ('drop schema %s cascade;',Schema_Drop);
  end loop;
end;$$;

regards
Marcos

--000000000000ac04f606455d6a6a
Content-Type: text/html; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable

<div dir=3D"ltr"><div dir=3D"ltr"><div><span style=3D"color:rgb(102,102,102=
);font-family:verdana,sans-serif">You can see how much time a subscriber wi=
ll need to get all files which were added with this.</span></div></div><div=
 class=3D"gmail_quote gmail_quote_container"><div class=3D"gmail_default" s=
tyle=3D"font-family:verdana,sans-serif;color:rgb(102,102,102)">Run first ti=
me and will create 10 thousand tables, publish them and measure how much ti=
me to get all tables Ready on pg_subscription_rel.</div><div class=3D"gmail=
_default" style=3D"font-family:verdana,sans-serif;color:rgb(102,102,102)">R=
un again to add more 10 thousand=C2=A0tables and see that time will increas=
e, more tables and more time.</div><div class=3D"gmail_default" style=3D"fo=
nt-family:verdana,sans-serif;color:rgb(102,102,102)"><br></div><div class=
=3D"gmail_default" style=3D"font-family:verdana,sans-serif;color:rgb(102,10=
2,102)">This is just to show that if you create a subscription with a high =
number of tables it spends more time doing select on pg_get_publication_tab=
les than the time spent actually copying. My use case I have 50 thousand ta=
bles, and it takes 5 seconds every time it needs to get next table to copy.=
</div><div class=3D"gmail_default" style=3D"font-family:verdana,sans-serif;=
color:rgb(102,102,102)"><br></div><div class=3D"gmail_default" style=3D"fon=
t-family:verdana,sans-serif;color:rgb(102,102,102)">--Create a empty=C2=A0p=
ublication</div><div class=3D"gmail_default" style=3D"font-family:verdana,s=
ans-serif;color:rgb(102,102,102)">create publication my_pub;<br><br>--Run t=
hese 3 following anonymous blocks to create schemas, tables and add them to=
 publication.</div><div class=3D"gmail_default" style=3D"font-family:verdan=
a,sans-serif;color:rgb(102,102,102)">--Need to have 3 blocks because I cann=
ot create a table in a schema that is not committed. And the same for a pub=
lication.<br>do $$ declare Schemas_Add integer =3D 100; Actual_Schema text;=
 begin<br>=C2=A0 for Actual_Schema in select &#39;test_&#39;||(select to_ch=
ar(coalesce(max(substring(nspname,&#39;test_(\d+)&#39;)::integer),0)+g,&#39=
;FM00000&#39;)<br>=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0from pg_namespace where nspname ~ &#39;test_\d+&#39;) from genera=
te_series(1,Schemas_Add) g loop<br>=C2=A0 =C2=A0 execute format(&#39;create=
 schema %s&#39;,Actual_Schema);<br>=C2=A0 end loop;<br>end;$$;<br><br>do $$=
 declare Tables_Add integer =3D 100; Actual_Schema text; begin<br>=C2=A0 fo=
r Actual_Schema in select nspname from pg_namespace where nspname =C2=A0~ &=
#39;test_\d+&#39; and<br>=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 =C2=A0 =C2=A0 =C2=A0 not exists(select from pg_class where relnamesp=
ace =3D pg_namespace.oid) loop<br>=C2=A0 =C2=A0 for j in 1..Tables_Add loop=
<br>=C2=A0 =C2=A0 =C2=A0 execute format(&#39;create table %s.test_%s as sel=
ect generate_series(1,random(0,10))::integer id;&#39;,<br>=C2=A0 =C2=A0 =C2=
=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0Actual_Schema,to=
_char(j,&#39;FM00000&#39;));<br>=C2=A0 =C2=A0 end loop;<br>=C2=A0 end loop;=
<br>end;$$;<br><br>do $$ declare Schemas_To_Add text =3D (select string_agg=
(nspname,&#39;,&#39;) from pg_namespace n where nspname ~ &#39;test_\d+&#39=
; and<br>=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 not exists(select from pg_publicatio=
n_namespace where pnnspid =3D n.oid)); begin<br>=C2=A0 execute format(&#39;=
alter publication my_pub add tables in schema %s;&#39;,Schemas_To_Add);<br>=
end;$$;<br><br>--Then you can see what was generated and go to the subscrib=
er side to refresh the subscription and measure time spent to synchronize.<=
br>select * from pg_Namespace where nspname ~ &#39;test_\d+&#39;;<br>select=
 pnnspid::regnamespace, * from pg_publication_namespace;<br>select oid::reg=
class, * from pg_Class where relnamespace::regnamespace::text ~ &#39;test_\=
d+&#39; and relkind =3D &#39;r&#39;;<br><br>--Later just clean what you do.=
<br>drop publication my_pub;<br><br>do $$ declare Schema_Drop text; begin<b=
r>=C2=A0 for Schema_Drop in select nspname from pg_Namespace where nspname =
~ &#39;test_\d+&#39; loop<br>=C2=A0 =C2=A0 execute format (&#39;drop schema=
 %s cascade;&#39;,Schema_Drop);<br>=C2=A0 end loop;<br>end;$$;</div><div cl=
ass=3D"gmail_default" style=3D"font-family:verdana,sans-serif;color:rgb(102=
,102,102)"><br></div><div class=3D"gmail_default" style=3D"font-family:verd=
ana,sans-serif;color:rgb(102,102,102)">regards</div><div class=3D"gmail_def=
ault" style=3D"font-family:verdana,sans-serif;color:rgb(102,102,102)">Marco=
s<br><br><br></div></div></div>

--000000000000ac04f606455d6a6a--