Received: from malur.postgresql.org ([217.196.149.56]) by arkaria.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.94.2) (envelope-from ) id 1srDOs-00G8lc-68 for pgsql-general@arkaria.postgresql.org; Thu, 19 Sep 2024 09:24:34 +0000 Received: from localhost ([127.0.0.1] helo=malur.postgresql.org) by malur.postgresql.org with esmtp (Exim 4.94.2) (envelope-from ) id 1srDOq-007tmv-Jh for pgsql-general@arkaria.postgresql.org; Thu, 19 Sep 2024 09:24:32 +0000 Received: from magus.postgresql.org ([2a02:c0:301:0:ffff::29]) by malur.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.94.2) (envelope-from ) id 1srDOq-007tmn-5i for pgsql-general@lists.postgresql.org; Thu, 19 Sep 2024 09:24:32 +0000 Received: from mail-yw1-x1135.google.com ([2607:f8b0:4864:20::1135]) by magus.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 (Exim 4.94.2) (envelope-from ) id 1srDOo-001yPv-1A for pgsql-general@lists.postgresql.org; Thu, 19 Sep 2024 09:24:31 +0000 Received: by mail-yw1-x1135.google.com with SMTP id 00721157ae682-6b5b65b1b9fso4537947b3.2 for ; Thu, 19 Sep 2024 02:24:29 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1726737868; x=1727342668; darn=lists.postgresql.org; h=cc:to:subject:message-id:date:from:in-reply-to:references :mime-version:from:to:cc:subject:date:message-id:reply-to; bh=8FWYLxhj0f6S60U4qT/SJvo9gLyEtatvaKYAAdZuqaA=; b=YFoUGnOpoVVFANfgd0lJQR7kDyIbk6Im4SwmFFss7GAodD6aDOb87of/1CazJk3big xYILxQloS5BtL7sI2LDib/D4Q1Vb1HZcUN1CQ/j64Kjbriqkw1WNcWgM4YxEivGHfJ2n tvP6OLTRK9y5kyabbZjHUG2KVH2diZPY4UvxS0InO0W92BeaPWN6TqUVHmN1KyUVEV52 oSAPV9cVtD8kqQyJP/lbUQJnrjo1umA6wqFz0p+w4BYY/O0dZ4TjjVhjPqw8IX27hIkE IpqlUXjGMxwiGFFvdogd5kF6Q3hB36Y5KukYWzzxwAs7RHDVbUpLtoGQEgWzulvlslbZ kljg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1726737868; x=1727342668; h=cc:to:subject:message-id:date:from:in-reply-to:references :mime-version:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=8FWYLxhj0f6S60U4qT/SJvo9gLyEtatvaKYAAdZuqaA=; b=jQvbDSkDgiam35251kKfSuam1wSuO9sHcoULF7DB0Z88/RqFYR2U9L6x5mT296E5C6 Vwy0ZIxixfcJI7HfZz9Pkj7Bh8K8n2KgRywE2lI0zDcHf6IJsc04aHneXHnC1T8UVeaF fJWR8fMIinPFNpZYcfTPS+VaG+ba8OVpRe4LUSHhZyY4DqUVSgxhiaLdKFKnxTTFZE2S mEituOwrAXJP9uvqsZcLHT6m7iV8c/bGUfhTCoA9K6VC49ZQ8jrY68DwonKKdVZ/utjJ mHSL9nS23kn7xYQfz9IuWDR8gZ1/pJztLjNA7QAoyjQyBctbK6/1l4+G5HZNL4RJTpk7 6rYQ== X-Gm-Message-State: AOJu0Yy8omARGXb7tm5ti0It7+KkKilEVKo4UVaRCAWP2c8NOm3/N2BL likqiFFGtp8xW37gykT1E5mpQVSskRxPkw47r21rBoXZPEtRknqcFzQ02JG5drUfo1wywMPXACt lglsSVI055UBaMeI8ty0/DCeyviPBsgeK X-Google-Smtp-Source: AGHT+IFbwbnmIiP9YR4tL1hyD8p1PYnmMvMBHzgleUtWXXBHNBhgigw5HBZdzne1kI6IkKBTHvH1/oL6LG8OQvprZYc= X-Received: by 2002:a05:690c:6083:b0:6dd:bcce:7cd7 with SMTP id 00721157ae682-6ddbcce7fa7mr137404727b3.42.1726737868086; Thu, 19 Sep 2024 02:24:28 -0700 (PDT) MIME-Version: 1.0 References: In-Reply-To: From: Lok P Date: Thu, 19 Sep 2024 14:54:16 +0530 Message-ID: Subject: Re: How batch processing works To: Ron Johnson Cc: pgsql-general Content-Type: multipart/alternative; boundary="0000000000003f68920622757d2b" List-Id: List-Help: List-Subscribe: List-Post: List-Owner: List-Archive: Archived-At: Precedence: bulk --0000000000003f68920622757d2b Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable On Thu, Sep 19, 2024 at 11:31=E2=80=AFAM Ron Johnson wrote: > > [snip] > >> >> Method-4 >> >> INSERT INTO parent_table VALUES (1, 'a'), (2, 'a'); >> INSERT INTO child_table VALUES (1,1, 'a'), (1,2, 'a'); >> commit; >> > > If I knew that I had to load a structured input data file (even if it had > parent and child records), this is how I'd do it (but probably first try > and see if "in-memory COPY INTO" is such a thing). > > I was trying to reproduce this behaviour using row by row commit vs just batch commit vs true batch insert as you mentioned, i am not able to see any difference between "batch commit" and "true batch insert" response. Am I missing anything? CREATE TABLE debug_log ( method1 TEXT, start_time TIMESTAMP, end_time TIMESTAMP, elapsed_time INTERVAL ); CREATE TABLE parent_table ( id SERIAL PRIMARY KEY, name TEXT ); CREATE TABLE parent_table2 ( id SERIAL PRIMARY KEY, name TEXT ); CREATE TABLE parent_table3 ( id SERIAL PRIMARY KEY, name TEXT ); DO $$ DECLARE num_inserts INTEGER :=3D 100000; batch_size INTEGER :=3D 50; start_time TIMESTAMP; end_time TIMESTAMP; elapsed_time INTERVAL; i INTEGER; BEGIN -- Method 1: Individual Inserts with Commit after every Row start_time :=3D clock_timestamp(); FOR i IN 1..num_inserts LOOP INSERT INTO parent_table VALUES (i, 'a'); COMMIT; END LOOP; end_time :=3D clock_timestamp(); elapsed_time :=3D end_time - start_time; INSERT INTO debug_log (method1, start_time, end_time, elapsed_time) VALUES ('Method 1: Individual Inserts with Commit after every Row', start_time, end_time, elapsed_time); -- Method 2: Individual Inserts with Commit after 100 Rows start_time :=3D clock_timestamp(); FOR i IN 1..num_inserts LOOP INSERT INTO parent_table2 VALUES (i, 'a'); -- Commit after every 100 rows IF i % batch_size =3D 0 THEN COMMIT; END IF; END LOOP; -- Final commit if not already committed commit; end_time :=3D clock_timestamp(); elapsed_time :=3D end_time - start_time; INSERT INTO debug_log (method1, start_time, end_time, elapsed_time) VALUES ('Method 2: Individual Inserts with Commit after 100 Rows', start_time, end_time, elapsed_time); -- Method 3: Batch Inserts with Commit after all start_time :=3D clock_timestamp(); FOR i IN 1..(num_inserts / batch_size) LOOP INSERT INTO parent_table3 VALUES (1 + (i - 1) * batch_size, 'a'), (2 + (i - 1) * batch_size, 'a'), (3 + (i - 1) * batch_size, 'a'), (4 + (i - 1) * batch_size, 'a'), (5 + (i - 1) * batch_size, 'a'), (6 + (i - 1) * batch_size, 'a'), (7 + (i - 1) * batch_size, 'a'), (8 + (i - 1) * batch_size, 'a'), (9 + (i - 1) * batch_size, 'a'), (10 + (i - 1) * batch_size, 'a'), (11 + (i - 1) * batch_size, 'a'), (12 + (i - 1) * batch_size, 'a'), (13 + (i - 1) * batch_size, 'a'), (14 + (i - 1) * batch_size, 'a'), (15 + (i - 1) * batch_size, 'a'), (16 + (i - 1) * batch_size, 'a'), (17 + (i - 1) * batch_size, 'a'), (18 + (i - 1) * batch_size, 'a'), (19 + (i - 1) * batch_size, 'a'), (20 + (i - 1) * batch_size, 'a'), (21 + (i - 1) * batch_size, 'a'), (22 + (i - 1) * batch_size, 'a'), (23 + (i - 1) * batch_size, 'a'), (24 + (i - 1) * batch_size, 'a'), (25 + (i - 1) * batch_size, 'a'), (26 + (i - 1) * batch_size, 'a'), (27 + (i - 1) * batch_size, 'a'), (28 + (i - 1) * batch_size, 'a'), (29 + (i - 1) * batch_size, 'a'), (30 + (i - 1) * batch_size, 'a'), (31 + (i - 1) * batch_size, 'a'), (32 + (i - 1) * batch_size, 'a'), (33 + (i - 1) * batch_size, 'a'), (34 + (i - 1) * batch_size, 'a'), (35 + (i - 1) * batch_size, 'a'), (36 + (i - 1) * batch_size, 'a'), (37 + (i - 1) * batch_size, 'a'), (38 + (i - 1) * batch_size, 'a'), (39 + (i - 1) * batch_size, 'a'), (40 + (i - 1) * batch_size, 'a'), (41 + (i - 1) * batch_size, 'a'), (42 + (i - 1) * batch_size, 'a'), (43 + (i - 1) * batch_size, 'a'), (44 + (i - 1) * batch_size, 'a'), (45 + (i - 1) * batch_size, 'a'), (46 + (i - 1) * batch_size, 'a'), (47 + (i - 1) * batch_size, 'a'), (48 + (i - 1) * batch_size, 'a'), (49 + (i - 1) * batch_size, 'a'), (50 + (i - 1) * batch_size, 'a')); COMMIT; END LOOP; COMMIT; -- Final commit for all end_time :=3D clock_timestamp(); elapsed_time :=3D end_time - start_time; INSERT INTO debug_log (method1, start_time, end_time, elapsed_time) VALUES ('Method 3: Batch Inserts with Commit after All', start_time, end_time, elapsed_time); END $$; --0000000000003f68920622757d2b Content-Type: text/html; charset="UTF-8" Content-Transfer-Encoding: quoted-printable


=
On Thu, Sep 19, 2024 at 11:31=E2=80= =AFAM Ron Johnson <ronljohnso= njr@gmail.com> wrote:

[snip]=C2=A0

Method-4

INSERT INTO parent_table V= ALUES =C2=A0(1, 'a'), (2, 'a');
INSERT INTO child_table = VALUES =C2=A0 (1,1, 'a'), (1,2, 'a');
commit;

If I knew that I had to load a stru= ctured input data file (even if it had parent=C2=A0and child records), this= is how I'd do it (but probably first try and see if "in-memory CO= PY INTO" is such a thing).



I= was trying to reproduce this behaviour using row by row commit vs just b= atch commit vs true batch insert as you mentioned, i am not able to see an= y difference between "batch commit" and "true batch insert&q= uot; response. Am I missing anything?

CREATE TABLE debug_log (
=C2=A0 =C2=A0 method1 TEXT,
=C2= =A0 =C2=A0 start_time TIMESTAMP,
=C2=A0 =C2=A0 end_time TIMESTAMP,
= =C2=A0 =C2=A0 elapsed_time INTERVAL
);

CREATE TABLE parent_table = (
=C2=A0 =C2=A0 id SERIAL PRIMARY KEY,
=C2=A0 =C2=A0 name TEXT
);<= br>

CREATE TABLE parent_table2 (
=C2=A0 =C2=A0 id SERIAL PRIMARY = KEY,
=C2=A0 =C2=A0 name TEXT
);


CREATE TABLE parent_table3= (
=C2=A0 =C2=A0 id SERIAL PRIMARY KEY,
=C2=A0 =C2=A0 name TEXT
);=
DO $$
DECLARE
=C2=A0 =C2=A0 num_inserts INTEGER :=3D 100000;
= =C2=A0 =C2=A0 batch_size INTEGER :=3D 50;
=C2=A0 =C2=A0 start_time TIMES= TAMP;
=C2=A0 =C2=A0 end_time TIMESTAMP;
=C2=A0 =C2=A0 elapsed_time IN= TERVAL;
=C2=A0 =C2=A0 i INTEGER;
BEGIN
=C2=A0 =C2=A0 -- Method 1: = Individual Inserts with Commit after every Row
=C2=A0 =C2=A0 start_time = :=3D clock_timestamp();

=C2=A0 =C2=A0 FOR i IN 1..num_inserts LOOP=C2=A0 =C2=A0 =C2=A0 =C2=A0 INSERT INTO parent_table VALUES (i, 'a= 9;);
=C2=A0 =C2=A0 =C2=A0 =C2=A0 COMMIT; =C2=A0
=C2=A0 =C2=A0 END LOO= P;

=C2=A0 =C2=A0 end_time :=3D clock_timestamp();
=C2=A0 =C2=A0 e= lapsed_time :=3D end_time - start_time;
=C2=A0 =C2=A0 INSERT INTO debug_= log (method1, start_time, end_time, elapsed_time)
=C2=A0 =C2=A0 VALUES = ('Method 1: Individual Inserts with Commit after every Row', start_= time, end_time, elapsed_time);

=C2=A0 =C2=A0 -- Method 2: Individual= Inserts with Commit after 100 Rows
=C2=A0 =C2=A0 start_time :=3D clock_= timestamp();

=C2=A0 =C2=A0 FOR i IN 1..num_inserts LOOP
=C2=A0 = =C2=A0 =C2=A0 =C2=A0 INSERT INTO parent_table2 VALUES (i, 'a');
= =C2=A0 =C2=A0 =C2=A0 =C2=A0 -- Commit after every 100 rows
=C2=A0 =C2=A0= =C2=A0 =C2=A0 IF i % batch_size =3D 0 THEN
=C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 COMMIT; =C2=A0
=C2=A0 =C2=A0 =C2=A0 =C2=A0 END IF;
=C2= =A0 =C2=A0 END LOOP;

=C2=A0 =C2=A0 -- Final commit if not already co= mmitted
=C2=A0 =C2=A0commit;

=C2=A0 =C2=A0 end_time :=3D clock_ti= mestamp();
=C2=A0 =C2=A0 elapsed_time :=3D end_time - start_time;
=C2= =A0 =C2=A0 INSERT INTO debug_log (method1, start_time, end_time, elapsed_ti= me)
=C2=A0 =C2=A0 VALUES ('Method 2: Individual Inserts with Commit= after 100 Rows', start_time, end_time, elapsed_time);

=C2=A0 = =C2=A0 -- Method 3: Batch Inserts with Commit after all
=C2=A0 =C2=A0 st= art_time :=3D clock_timestamp();

=C2=A0 =C2=A0 FOR i IN 1..(num_inse= rts / batch_size) LOOP
=C2=A0 =C2=A0 =C2=A0 =C2=A0 INSERT INTO parent_ta= ble3 VALUES
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (1 + (i - 1) * ba= tch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (2 + (= i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 (3 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 (4 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 (5 + (i - 1) * batch_size, 'a'),
= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (6 + (i - 1) * batch_size, 'a= '),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (7 + (i - 1) * batch_s= ize, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (8 + (i - 1= ) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = (9 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 (10 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 (11 + (i - 1) * batch_size, 'a'),
=C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (12 + (i - 1) * batch_size, 'a&#= 39;),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (13 + (i - 1) * batch_si= ze, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (14 + (i - 1= ) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = (15 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 (16 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 (17 + (i - 1) * batch_size, 'a'),
= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (18 + (i - 1) * batch_size, '= a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (19 + (i - 1) * batch= _size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (20 + (i = - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 (21 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 (22 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (23 + (i - 1) * batch_size, 'a'),=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (24 + (i - 1) * batch_size, = 9;a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (25 + (i - 1) * bat= ch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (26 + (= i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 (27 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 (28 + (i - 1) * batch_size, 'a'),
=C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (29 + (i - 1) * batch_size, 'a')= ,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (30 + (i - 1) * batch_size, = 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (31 + (i - 1) * = batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (32 = + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 (33 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 (34 + (i - 1) * batch_size, 'a'),
=C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (35 + (i - 1) * batch_size, 'a')= ,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (36 + (i - 1) * batch_size, = 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (37 + (i - 1) * = batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (38 = + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 (39 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 (40 + (i - 1) * batch_size, 'a'),
=C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (41 + (i - 1) * batch_size, 'a')= ,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (42 + (i - 1) * batch_size, = 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (43 + (i - 1) * = batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (44 = + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 (45 + (i - 1) * batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 (46 + (i - 1) * batch_size, 'a'),
=C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (47 + (i - 1) * batch_size, 'a')= ,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (48 + (i - 1) * batch_size, = 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (49 + (i - 1) * = batch_size, 'a'),
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (50 = + (i - 1) * batch_size, 'a'));
COMMIT;
=C2=A0 =C2=A0 END L= OOP;

=C2=A0 =C2=A0 COMMIT; =C2=A0-- Final commit for all
=C2=A0 = =C2=A0 end_time :=3D clock_timestamp();
=C2=A0 =C2=A0 elapsed_time :=3D = end_time - start_time;
=C2=A0 =C2=A0 INSERT INTO debug_log (method1, sta= rt_time, end_time, elapsed_time)
=C2=A0 =C2=A0 VALUES ('Method 3: B= atch Inserts with Commit after All', start_time, end_time, elapsed_time= );

END $$;


=C2=A0
--0000000000003f68920622757d2b--