public inbox for [email protected]  
help / color / mirror / Atom feed
Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
29+ messages / 9 participants
[nested] [flat]

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-03 06:22  jian he <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: jian he @ 2024-02-03 06:22 UTC (permalink / raw)
  To: Yugo NAGATA <[email protected]>; +Cc: David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

The idea of on_error is to tolerate errors, I think.
if a column has a not null constraint, let it cannot be used with
(on_error 'null')

Based on this, I've made a patch.
based on COPY Synopsis: ON_ERROR 'error_action'
on_error 'null', the  keyword NULL should be single quoted.

demo:
COPY check_ign_err FROM STDIN WITH (on_error 'null');
1 {1} a
2 {2} 1
3 {3} 2
4 {4} b
a {5} c
\.

\pset null NULL

SELECT * FROM check_ign_err;
  n   |  m  |  k
------+-----+------
    1 | {1} | NULL
    2 | {2} |    1
    3 | {3} |    2
    4 | {4} | NULL
 NULL | {5} | NULL


Attachments:

  [text/x-patch] v1-0001-introduce-copy-on_error-null-option.patch (8.8K, 2-v1-0001-introduce-copy-on_error-null-option.patch)
  download | inline diff:
From 19afa942af22fd3d2ed2436c6bc7ce02f00bb570 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Sat, 3 Feb 2024 14:04:08 +0800
Subject: [PATCH v1 1/1] introduce copy on_error 'null' option

on_error 'null', null needs single quoted.
any data type conversion error will treat that column value to be NULL.
it will not work with column have not null constraint,
we check this at BeginCopyFrom.

discussion: https://www.postgresql.org/message-id/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  1 +
 src/backend/commands/copy.c          |  2 ++
 src/backend/commands/copyfrom.c      | 28 ++++++++++++++++++++++++----
 src/backend/commands/copyfromparse.c | 27 +++++++++++++++++++++++++--
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 26 ++++++++++++++++++++++++++
 src/test/regress/sql/copy2.sql       | 28 ++++++++++++++++++++++++++++
 7 files changed, 107 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 55764fc1..d3c4ebdc 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -390,6 +390,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       An <replaceable class="parameter">error_action</replaceable> value of
       <literal>stop</literal> means fail the command, while
       <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>null</literal> means the input value will be <literal>null</literal> and continue with the next one.
       The default is <literal>stop</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cc0786c6..01ce47b0 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -422,6 +422,8 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 1fe70b91..f4c5704e 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1005,6 +1005,7 @@ CopyFrom(CopyFromState cstate)
 			 * information according to ON_ERROR.
 			 */
 			if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+			{
 
 				/*
 				 * Just make ErrorSaveContext ready for the next NextCopyFrom.
@@ -1013,11 +1014,18 @@ CopyFrom(CopyFromState cstate)
 				 */
 				cstate->escontext->error_occurred = false;
 
-			/* Report that this tuple was skipped by the ON_ERROR clause */
-			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
-										 ++skipped);
+				/* Report that this tuple was skipped by the ON_ERROR clause */
+				pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
+											++skipped);
 
-			continue;
+				continue;
+			}
+			/*
+			 * Just make ErrorSaveContext ready for the next NextCopyFrom.
+			 *
+			*/
+			if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				cstate->escontext->error_occurred = false;
 		}
 
 		ExecStoreVirtualTuple(myslot);
@@ -1313,6 +1321,7 @@ CopyFrom(CopyFromState cstate)
 	error_context_stack = errcallback.previous;
 
 	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+		cstate->opts.on_error != COPY_ON_ERROR_NULL &&
 		cstate->num_errors > 0)
 		ereport(NOTICE,
 				errmsg_plural("%llu row was skipped due to data type incompatibility",
@@ -1468,6 +1477,8 @@ BeginCopyFrom(ParseState *pstate,
 		 */
 		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
 			cstate->escontext->details_wanted = false;
+		else if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+			cstate->escontext->details_wanted = false;
 	}
 	else
 		cstate->escontext = NULL;
@@ -1621,6 +1632,15 @@ BeginCopyFrom(ParseState *pstate,
 		if (att->attisdropped)
 			continue;
 
+		/*
+		 * we can specify on_error 'null', but it can only apply to columns
+		 * don't have not null constraint.
+		*/
+		if (att->attnotnull && cstate->opts.on_error == COPY_ON_ERROR_NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("copy on_error 'null' cannot be used with not null constraint column")));
+
 		/* Fetch the input function and typioparam info */
 		if (cstate->opts.binary)
 			getTypeBinaryInputInfo(att->atttypid,
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 7cacd0b7..9475a9dc 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -881,6 +881,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 		int			fldct;
 		int			fieldno;
 		char	   *string;
+		bool		error_happened = false;
 
 		/* read raw fields in the next line */
 		if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
@@ -968,14 +969,36 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 											(Node *) cstate->escontext,
 											&values[m]))
 			{
-				cstate->num_errors++;
-				return true;
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+
+					values[m] = (Datum) 0;
+					nulls[m] = true;
+					/* here, we need set error_occurred to false, so COPY will be continue */
+					cstate->escontext->error_occurred = false;
+
+					if (!error_happened)
+						error_happened = true;
+				}
+				else
+				{
+					cstate->num_errors++;
+					return true;
+				}
+
 			}
 
 			cstate->cur_attname = NULL;
 			cstate->cur_attval = NULL;
 		}
 
+		/* accumate num_errors. one row multiple errors only count 1*/
+		if (error_happened)
+		{
+			cstate->num_errors++;
+			cstate->escontext->error_occurred = true;
+		}
+
 		Assert(fieldno == attr_count);
 	}
 	else
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index b3da3cb0..6c3733fa 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* transform error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 25c401ce..0038d6c8 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -751,6 +751,31 @@ CONTEXT:  COPY check_ign_err, line 1: "1	{1}"
 COPY check_ign_err FROM STDIN WITH (on_error ignore);
 ERROR:  extra data after last expected column
 CONTEXT:  COPY check_ign_err, line 1: "1	{1}	3	abc"
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error 'null');
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null');
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1: "1,"
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null');
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
+CREATE temp TABLE check_on_err_null (n int, m int not null, k int);
+--should fail. since not null constraint conflict with on_error null
+COPY check_on_err_null FROM STDIN WITH (on_error 'null');
+ERROR:  copy on_error 'null' cannot be used with not null constraint column
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -767,6 +792,7 @@ DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
 DROP TABLE hard_err;
+DROP TABLE check_on_err_null;
 --
 -- COPY FROM ... DEFAULT
 --
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index b5e549e8..dacde209 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -534,6 +534,33 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore);
 1	{1}	3	abc
 \.
 
+
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error 'null');
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null');
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null');
+1,{1},1,1
+\.
+
+CREATE temp TABLE check_on_err_null (n int, m int not null, k int);
+--should fail. since not null constraint conflict with on_error null
+COPY check_on_err_null FROM STDIN WITH (on_error 'null');
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -550,6 +577,7 @@ DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
 DROP TABLE hard_err;
+DROP TABLE check_on_err_null;
 
 --
 -- COPY FROM ... DEFAULT
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-05 02:28  torikoshia <[email protected]>
  parent: jian he <[email protected]>
  0 siblings, 2 replies; 29+ messages in thread

From: torikoshia @ 2024-02-05 02:28 UTC (permalink / raw)
  To: jian he <[email protected]>; +Cc: Yugo NAGATA <[email protected]>; David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

Hi,

On 2024-02-03 15:22, jian he wrote:
> The idea of on_error is to tolerate errors, I think.
> if a column has a not null constraint, let it cannot be used with
> (on_error 'null')

> +       /*
> +        * we can specify on_error 'null', but it can only apply to 
> columns
> +        * don't have not null constraint.
> +       */
> +       if (att->attnotnull && cstate->opts.on_error == 
> COPY_ON_ERROR_NULL)
> +           ereport(ERROR,
> +                   (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
> +                    errmsg("copy on_error 'null' cannot be used with 
> not null constraint column")));

This means we cannot use ON_ERROR 'null' even when there is one column 
which have NOT NULL constraint, i.e. primary key, right?
IMHO this is strong constraint and will decrease the opportunity to use 
this feature.

It might be better to allow error_action 'null' for tables which have 
NOT NULL constraint columns, and when facing soft errors for those rows, 
skip that row or stop COPY.

> Based on this, I've made a patch.
> based on COPY Synopsis: ON_ERROR 'error_action'
> on_error 'null', the  keyword NULL should be single quoted.

As you mentioned, single quotation seems a little odd..

I'm not sure what is the best name and syntax for this feature, but 
since current error_action are verbs('stop' and 'ignore'), I feel 'null' 
might not be appropriate.

> demo:
> COPY check_ign_err FROM STDIN WITH (on_error 'null');
> 1 {1} a
> 2 {2} 1
> 3 {3} 2
> 4 {4} b
> a {5} c
> \.
> 
> \pset null NULL
> 
> SELECT * FROM check_ign_err;
>   n   |  m  |  k
> ------+-----+------
>     1 | {1} | NULL
>     2 | {2} |    1
>     3 | {3} |    2
>     4 | {4} | NULL
>  NULL | {5} | NULL

Since we notice the number of ignored rows when ON_ERROR is 'ignore', 
users may want to know the number of rows which was changed to NULL when 
using ON_ERROR 'null'.

-- 
Regards,

--
Atsushi Torikoshi
NTT DATA Group Corporation






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-05 06:26  jian he <[email protected]>
  parent: torikoshia <[email protected]>
  1 sibling, 1 reply; 29+ messages in thread

From: jian he @ 2024-02-05 06:26 UTC (permalink / raw)
  To: torikoshia <[email protected]>; +Cc: Yugo NAGATA <[email protected]>; David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

On Mon, Feb 5, 2024 at 10:29 AM torikoshia <[email protected]> wrote:
>
> Hi,
>
> On 2024-02-03 15:22, jian he wrote:
> > The idea of on_error is to tolerate errors, I think.
> > if a column has a not null constraint, let it cannot be used with
> > (on_error 'null')
>
> > +       /*
> > +        * we can specify on_error 'null', but it can only apply to
> > columns
> > +        * don't have not null constraint.
> > +       */
> > +       if (att->attnotnull && cstate->opts.on_error ==
> > COPY_ON_ERROR_NULL)
> > +           ereport(ERROR,
> > +                   (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
> > +                    errmsg("copy on_error 'null' cannot be used with
> > not null constraint column")));
>
> This means we cannot use ON_ERROR 'null' even when there is one column
> which have NOT NULL constraint, i.e. primary key, right?
> IMHO this is strong constraint and will decrease the opportunity to use
> this feature.

I don't want to fail in the middle of bulk inserts,
so I thought immediately erroring out would be a great idea.
Let's see what other people think.






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-05 08:22  Yugo NAGATA <[email protected]>
  parent: torikoshia <[email protected]>
  1 sibling, 1 reply; 29+ messages in thread

From: Yugo NAGATA @ 2024-02-05 08:22 UTC (permalink / raw)
  To: torikoshia <[email protected]>; +Cc: jian he <[email protected]>; David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

On Mon, 05 Feb 2024 11:28:59 +0900
torikoshia <[email protected]> wrote:

> > Based on this, I've made a patch.
> > based on COPY Synopsis: ON_ERROR 'error_action'
> > on_error 'null', the  keyword NULL should be single quoted.
> 
> As you mentioned, single quotation seems a little odd..
> 
> I'm not sure what is the best name and syntax for this feature, but 
> since current error_action are verbs('stop' and 'ignore'), I feel 'null' 
> might not be appropriate.

I am not in favour of using 'null' either, so I suggested to use
"set_to_null" or more generic syntax like  "set_to (col, val)" in my
previous post[1], although I'm not convinced what is the best either.

[1] https://www.postgresql.org/message-id/20240129172858.ccb6c77c3be95a295e2b2b44%40sraoss.co.jp

Regards,
Yugo Nagata

-- 
Yugo NAGATA <[email protected]>






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-06 00:39  Kyotaro Horiguchi <[email protected]>
  parent: Yugo NAGATA <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Kyotaro Horiguchi @ 2024-02-06 00:39 UTC (permalink / raw)
  To: [email protected]; +Cc: [email protected]; [email protected]; [email protected]; [email protected]

At Mon, 5 Feb 2024 17:22:56 +0900, Yugo NAGATA <[email protected]> wrote in 
> On Mon, 05 Feb 2024 11:28:59 +0900
> torikoshia <[email protected]> wrote:
> 
> > > Based on this, I've made a patch.
> > > based on COPY Synopsis: ON_ERROR 'error_action'
> > > on_error 'null', the  keyword NULL should be single quoted.
> > 
> > As you mentioned, single quotation seems a little odd..
> > 
> > I'm not sure what is the best name and syntax for this feature, but 
> > since current error_action are verbs('stop' and 'ignore'), I feel 'null' 
> > might not be appropriate.
> 
> I am not in favour of using 'null' either, so I suggested to use
> "set_to_null" or more generic syntax like  "set_to (col, val)" in my
> previous post[1], although I'm not convinced what is the best either.
> 
> [1] https://www.postgresql.org/message-id/20240129172858.ccb6c77c3be95a295e2b2b44%40sraoss.co.jp

Tom sugggested using a separate option, and I agree with the
suggestion. Taking this into consideration, I imagined something like
the following, for example.  Although I'm not sure we are actually
going to do whole-tuple replacement, the action name in this example
has the suffix '-column'.

COPY (on_error 'replace-colomn', replacement 'null') ..

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-06 07:46  Yugo NAGATA <[email protected]>
  parent: Kyotaro Horiguchi <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Yugo NAGATA @ 2024-02-06 07:46 UTC (permalink / raw)
  To: Kyotaro Horiguchi <[email protected]>; +Cc: [email protected]; [email protected]; [email protected]; [email protected]

On Tue, 06 Feb 2024 09:39:09 +0900 (JST)
Kyotaro Horiguchi <[email protected]> wrote:

> At Mon, 5 Feb 2024 17:22:56 +0900, Yugo NAGATA <[email protected]> wrote in 
> > On Mon, 05 Feb 2024 11:28:59 +0900
> > torikoshia <[email protected]> wrote:
> > 
> > > > Based on this, I've made a patch.
> > > > based on COPY Synopsis: ON_ERROR 'error_action'
> > > > on_error 'null', the  keyword NULL should be single quoted.
> > > 
> > > As you mentioned, single quotation seems a little odd..
> > > 
> > > I'm not sure what is the best name and syntax for this feature, but 
> > > since current error_action are verbs('stop' and 'ignore'), I feel 'null' 
> > > might not be appropriate.
> > 
> > I am not in favour of using 'null' either, so I suggested to use
> > "set_to_null" or more generic syntax like  "set_to (col, val)" in my
> > previous post[1], although I'm not convinced what is the best either.
> > 
> > [1] https://www.postgresql.org/message-id/20240129172858.ccb6c77c3be95a295e2b2b44%40sraoss.co.jp
> 
> Tom sugggested using a separate option, and I agree with the
> suggestion. Taking this into consideration, I imagined something like
> the following, for example.  Although I'm not sure we are actually
> going to do whole-tuple replacement, the action name in this example
> has the suffix '-column'.
> 
> COPY (on_error 'replace-colomn', replacement 'null') ..

Thank you for your information. I've found a post[1] you mentioned, 
where adding a separate option for error log destination was suggested. 

Considering consistency with other options, adding a separate option
would be better if we want to specify a value to replace the invalid
value, without introducing a complex syntax that allows options with
more than one parameters. Maybe, if we allow to use values for the
replacement other than NULL, we have to also add a option to specify
a column (or a type)  for each replacement value. Or,  we may add a
option to specify a list of replacement values as many as the number of
columns, each of whose default is NULL.

Anyway, I prefer 'replace" (or 'set_to') to just 'null' as the option
value.

[1] https://www.postgresql.org/message-id/2070915.1705527477%40sss.pgh.pa.us

Regards,
Yugo Nagata


> regards.
> 
> -- 
> Kyotaro Horiguchi
> NTT Open Source Software Center


-- 
Yugo NAGATA <[email protected]>






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-06 08:38  jian he <[email protected]>
  parent: Yugo NAGATA <[email protected]>
  0 siblings, 0 replies; 29+ messages in thread

From: jian he @ 2024-02-06 08:38 UTC (permalink / raw)
  To: Yugo NAGATA <[email protected]>; +Cc: Kyotaro Horiguchi <[email protected]>; [email protected]; [email protected]; [email protected]

On Tue, Feb 6, 2024 at 3:46 PM Yugo NAGATA <[email protected]> wrote:
>
> On Tue, 06 Feb 2024 09:39:09 +0900 (JST)
> Kyotaro Horiguchi <[email protected]> wrote:
>
> > At Mon, 5 Feb 2024 17:22:56 +0900, Yugo NAGATA <[email protected]> wrote in
> > > On Mon, 05 Feb 2024 11:28:59 +0900
> > > torikoshia <[email protected]> wrote:
> > >
> > > > > Based on this, I've made a patch.
> > > > > based on COPY Synopsis: ON_ERROR 'error_action'
> > > > > on_error 'null', the  keyword NULL should be single quoted.
> > > >
> > > > As you mentioned, single quotation seems a little odd..
> > > >
> > > > I'm not sure what is the best name and syntax for this feature, but
> > > > since current error_action are verbs('stop' and 'ignore'), I feel 'null'
> > > > might not be appropriate.
> > >
> > > I am not in favour of using 'null' either, so I suggested to use
> > > "set_to_null" or more generic syntax like  "set_to (col, val)" in my
> > > previous post[1], although I'm not convinced what is the best either.
> > >
> > > [1] https://www.postgresql.org/message-id/20240129172858.ccb6c77c3be95a295e2b2b44%40sraoss.co.jp
> >
> > Tom sugggested using a separate option, and I agree with the
> > suggestion. Taking this into consideration, I imagined something like
> > the following, for example.  Although I'm not sure we are actually
> > going to do whole-tuple replacement, the action name in this example
> > has the suffix '-column'.
> >
> > COPY (on_error 'replace-colomn', replacement 'null') ..
>
> Thank you for your information. I've found a post[1] you mentioned,
> where adding a separate option for error log destination was suggested.
>
> Considering consistency with other options, adding a separate option
> would be better if we want to specify a value to replace the invalid
> value, without introducing a complex syntax that allows options with
> more than one parameters. Maybe, if we allow to use values for the
> replacement other than NULL, we have to also add a option to specify
> a column (or a type)  for each replacement value. Or,  we may add a
> option to specify a list of replacement values as many as the number of
> columns, each of whose default is NULL.
>
> Anyway, I prefer 'replace" (or 'set_to') to just 'null' as the option
> value.
>

Let's say tabe t column (a,b,c)
if we support set_to_null(a,b), what should we do if column c has an error.
should we ignore this row or error out immediately?
also I am not sure it's doable to just extract columnList from the
function defGetCopyOnErrorChoice.

to make `COPY x from stdin (on_error set_to_null(a,b);` work,
we may need to refactor to gram.y, in a similar way we do force null

i am ok with
COPY x from stdin (on_error set_to_null);






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-06 10:19  Yugo NAGATA <[email protected]>
  parent: jian he <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Yugo NAGATA @ 2024-02-06 10:19 UTC (permalink / raw)
  To: jian he <[email protected]>; +Cc: torikoshia <[email protected]>; David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

On Mon, 5 Feb 2024 14:26:46 +0800
jian he <[email protected]> wrote:

> On Mon, Feb 5, 2024 at 10:29 AM torikoshia <[email protected]> wrote:
> >
> > Hi,
> >
> > On 2024-02-03 15:22, jian he wrote:
> > > The idea of on_error is to tolerate errors, I think.
> > > if a column has a not null constraint, let it cannot be used with
> > > (on_error 'null')
> >
> > > +       /*
> > > +        * we can specify on_error 'null', but it can only apply to
> > > columns
> > > +        * don't have not null constraint.
> > > +       */
> > > +       if (att->attnotnull && cstate->opts.on_error ==
> > > COPY_ON_ERROR_NULL)
> > > +           ereport(ERROR,
> > > +                   (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
> > > +                    errmsg("copy on_error 'null' cannot be used with
> > > not null constraint column")));
> >
> > This means we cannot use ON_ERROR 'null' even when there is one column
> > which have NOT NULL constraint, i.e. primary key, right?
> > IMHO this is strong constraint and will decrease the opportunity to use
> > this feature.
> 
> I don't want to fail in the middle of bulk inserts,
> so I thought immediately erroring out would be a great idea.
> Let's see what other people think.

I also think this restriction is too strong because it is very
common that a table has a primary key, unless there is some way
to specify columns that can be set to NULL. Even when ON_ERROR
is specified, any constraint violation errors cannot be generally
ignored, so we cannot elimiate the posibility COPY FROM fails in
the middle due to invalid data, anyway.

Regards,
Yugo Nagata

-- 
Yugo NAGATA <[email protected]>






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-12 00:00  jian he <[email protected]>
  parent: Yugo NAGATA <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: jian he @ 2024-02-12 00:00 UTC (permalink / raw)
  To: Yugo NAGATA <[email protected]>; +Cc: torikoshia <[email protected]>; David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

attached v2.
syntax: `on_error set_to_null`
based on upthread discussion, now if you specified `on_error
set_to_null` and your column has `not
null` constraint, we convert the error field to null, so it may error
while bulk inserting for violating NOT NULL constraint.


Attachments:

  [text/x-patch] v2-0001-on_error-set_to_null.patch (8.3K, 2-v2-0001-on_error-set_to_null.patch)
  download | inline diff:
From c95bb7b7c072f510b9a60695714be21345f21591 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Sat, 10 Feb 2024 15:08:41 +0800
Subject: [PATCH v2 1/1] on_error set_to_null

any data type conversion errors while COPY FROM will set that column value to be NULL.
discussion: https://www.postgresql.org/message-id/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  1 +
 src/backend/commands/copy.c          |  2 ++
 src/backend/commands/copyfrom.c      | 30 ++++++++++++++++++++++------
 src/backend/commands/copyfromparse.c | 28 ++++++++++++++++++++++++--
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 22 ++++++++++++++++++++
 src/test/regress/sql/copy2.sql       | 23 +++++++++++++++++++++
 7 files changed, 99 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 55764fc1..d8b609b6 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -390,6 +390,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       An <replaceable class="parameter">error_action</replaceable> value of
       <literal>stop</literal> means fail the command, while
       <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>set_to_null</literal> means the input value will set to <literal>null</literal> and continue with the next field.
       The default is <literal>stop</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cc0786c6..9c7d6ebd 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -422,6 +422,8 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 41f6bc43..2a87bcf3 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1005,6 +1005,7 @@ CopyFrom(CopyFromState cstate)
 			 * information according to ON_ERROR.
 			 */
 			if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+			{
 
 				/*
 				 * Just make ErrorSaveContext ready for the next NextCopyFrom.
@@ -1013,11 +1014,18 @@ CopyFrom(CopyFromState cstate)
 				 */
 				cstate->escontext->error_occurred = false;
 
-			/* Report that this tuple was skipped by the ON_ERROR clause */
-			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
-										 ++skipped);
+				/* Report that this tuple was skipped by the ON_ERROR clause */
+				pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
+											++skipped);
 
-			continue;
+				continue;
+			}
+			/*
+			 * Just make ErrorSaveContext ready for the next NextCopyFrom.
+			 *
+			*/
+			if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				cstate->escontext->error_occurred = false;
 		}
 
 		ExecStoreVirtualTuple(myslot);
@@ -1312,7 +1320,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
 		cstate->num_errors > 0)
 		ereport(NOTICE,
 				errmsg_plural("%llu row was skipped due to data type incompatibility",
@@ -1320,6 +1328,14 @@ CopyFrom(CopyFromState cstate)
 							  (unsigned long long) cstate->num_errors,
 							  (unsigned long long) cstate->num_errors));
 
+	if (cstate->opts.on_error == COPY_ON_ERROR_NULL &&
+		cstate->num_errors > 0)
+		ereport(NOTICE,
+				errmsg_plural("some columns of %llu rows, value was converted to NULL due to data type incompatibility",
+							  "some columns of %llu rows, value were converted to NULL due to data type incompatibility",
+							  (unsigned long long) cstate->num_errors,
+							  (unsigned long long) cstate->num_errors));
+
 	if (bistate != NULL)
 		FreeBulkInsertState(bistate);
 
@@ -1463,11 +1479,13 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL. We'll add other
 		 * options later
 		 */
 		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
 			cstate->escontext->details_wanted = false;
+		else if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+			cstate->escontext->details_wanted = false;
 	}
 	else
 		cstate->escontext = NULL;
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 90675636..9d77c3d1 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -873,6 +873,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 		int			fldct;
 		int			fieldno;
 		char	   *string;
+		bool		error_happened = false;
 
 		/* read raw fields in the next line */
 		if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
@@ -960,14 +961,37 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 											(Node *) cstate->escontext,
 											&values[m]))
 			{
-				cstate->num_errors++;
-				return true;
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+
+					values[m] = (Datum) 0;
+					nulls[m] = true;
+					/* here, we need set error_occurred to false, so COPY will be continue */
+					cstate->escontext->error_occurred = false;
+
+					/* does any conversion error ever happened in all the fields */
+					if (!error_happened)
+						error_happened = true;
+				}
+				else
+				{
+					cstate->num_errors++;
+					return true;
+				}
+
 			}
 
 			cstate->cur_attname = NULL;
 			cstate->cur_attval = NULL;
 		}
 
+		/* update num_errors. one row with multiple errors field only count 1*/
+		if (error_happened)
+		{
+			cstate->num_errors++;
+			cstate->escontext->error_occurred = true;
+		}
+
 		Assert(fieldno == attr_count);
 	}
 	else
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index b3da3cb0..931fe09b 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 25c401ce..879da283 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -751,6 +751,28 @@ CONTEXT:  COPY check_ign_err, line 1: "1	{1}"
 COPY check_ign_err FROM STDIN WITH (on_error ignore);
 ERROR:  extra data after last expected column
 CONTEXT:  COPY check_ign_err, line 1: "1	{1}	3	abc"
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+NOTICE:  some columns of 3 rows, value were converted to NULL due to data type incompatibility
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1: "1,"
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index b5e549e8..67bf45a7 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -534,6 +534,29 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore);
 1	{1}	3	abc
 \.
 
+
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-16 20:16  Jim Jones <[email protected]>
  parent: jian he <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Jim Jones @ 2024-02-16 20:16 UTC (permalink / raw)
  To: jian he <[email protected]>; Yugo NAGATA <[email protected]>; +Cc: torikoshia <[email protected]>; David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

Hi!

On 12.02.24 01:00, jian he wrote:
> attached v2.
> syntax: `on_error set_to_null`
> based on upthread discussion, now if you specified `on_error
> set_to_null` and your column has `not
> null` constraint, we convert the error field to null, so it may error
> while bulk inserting for violating NOT NULL constraint.
That's a very nice feature. Thanks for implementing it!

v2 applies cleanly and works as described.

\pset null '(NULL)'

CREATE TEMPORARY TABLE t1 (a int, b int);
COPY t1 (a,b) FROM STDIN;
1    a
2    1
3    2
4    b
a    c
\.
SELECT * FROM t1;

CONTEXT:  COPY t1, line 1, column b: "a"
 a | b
---+---
(0 rows)


CREATE TEMPORARY TABLE t2 (a int, b int);
COPY t2 (a,b) FROM STDIN WITH (on_error set_to_null);
1    a
2    1
3    2
4    b
a    c
\.
SELECT * FROM t2;

psql:test-copy-on_error-2.sql:12: NOTICE:  some columns of 3 rows, value
were converted to NULL due to data type incompatibility
COPY 5
   a    |   b    
--------+--------
      1 | (NULL)
      2 |      1
      3 |      2
      4 | (NULL)
 (NULL) | (NULL)
(5 rows)


I have one question though:

In case all columns of a record have been set to null due to data type
incompatibility, should we insert it at all? See t2 example above.
I'm not sure if these records would be of any use in the table. What do
you think?

Since the parameter is already called "set_to_null", maybe it is not
necessary to mention in the NOTICE message that the values have been set
to null.
Perhaps something like "XX records were only partially copied due to
data type incompatibility"


-- 
Jim







^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-16 20:31  David G. Johnston <[email protected]>
  parent: Jim Jones <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: David G. Johnston @ 2024-02-16 20:31 UTC (permalink / raw)
  To: Jim Jones <[email protected]>; +Cc: jian he <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Fri, Feb 16, 2024 at 1:16 PM Jim Jones <[email protected]> wrote:

> In case all columns of a record have been set to null due to data type
> incompatibility, should we insert it at all?


Yes.  In particular not all columns in the table need be specified in the
copy command so while the parsed input data is all nulls the record itself
may not be.

The system should allow the user to exclude rows with incomplete data by
ignoring a not null constraint violation.

In short we shouldn't judge non-usefulness and instead give tools to the
user to decide for themselves.

David J.


^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-02-16 23:05  Jim Jones <[email protected]>
  parent: David G. Johnston <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Jim Jones @ 2024-02-16 23:05 UTC (permalink / raw)
  To: David G. Johnston <[email protected]>; +Cc: jian he <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>



On 16.02.24 21:31, David G. Johnston wrote:
> Yes.  In particular not all columns in the table need be specified in
> the copy command so while the parsed input data is all nulls the
> record itself may not be.

Yeah, you have a point there.
I guess if users want to avoid it to happen they can rely on NOT NULL
constraints.

Thanks

-- 
Jim







^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-08-26 00:00  jian he <[email protected]>
  parent: Jim Jones <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: jian he @ 2024-08-26 00:00 UTC (permalink / raw)
  To: Jim Jones <[email protected]>; +Cc: David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

hi all.
patch updated.
simplified the code a lot.

idea is same:
COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);

If the STDIN number of columns is the same as the target table, then
InputFunctionCallSafe
call failure will make that column values be null.


If the STDIN number of columns is not the same as the target table, then error
ERROR:  missing data for column \"%s\"
ERROR:  extra data after last expected column
which is status quo.


Attachments:

  [text/x-patch] v3-0001-on_error-set_to_null.patch (7.7K, 2-v3-0001-on_error-set_to_null.patch)
  download | inline diff:
From d697684b4dc1356172d93179b1e5e157893c3e54 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Fri, 23 Aug 2024 22:26:44 +0800
Subject: [PATCH v3 1/1] on_error set_to_null

any data type conversion errors while COPY FROM will set that column value to be NULL.
discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  1 +
 src/backend/commands/copy.c          |  2 ++
 src/backend/commands/copyfrom.c      |  8 ++++--
 src/backend/commands/copyfromparse.c | 11 ++++++++
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 39 ++++++++++++++++++++++++++
 src/test/regress/sql/copy2.sql       | 41 ++++++++++++++++++++++++++++
 7 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 1518af8a04..b6bdf45e7e 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,6 +394,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       An <replaceable class="parameter">error_action</replaceable> value of
       <literal>stop</literal> means fail the command, while
       <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>set_to_null</literal> means the input value will set to <literal>null</literal> and continue with the next one.
       The default is <literal>stop</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3bb579a3a4..e4bd310ae5 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -409,6 +409,8 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 2d3462913e..c1e58e49bc 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1319,7 +1319,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
 		cstate->num_errors > 0)
 		ereport(NOTICE,
 				errmsg_plural("%llu row was skipped due to data type incompatibility",
@@ -1471,11 +1471,13 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
 		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
 			cstate->escontext->details_wanted = false;
+		else if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+			cstate->escontext->details_wanted = false;
 	}
 	else
 		cstate->escontext = NULL;
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 7efcb89159..6fbe975b51 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -969,6 +969,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+					values[m] = (Datum) 0;
+					nulls[m] = true;
+					/*
+					 * set error_occurred to false, so next
+					 * InputFunctionCallSafe call behave sane.
+					*/
+					cstate->escontext->error_occurred = false;
+					continue;
+				}
 				cstate->num_errors++;
 
 				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 141fd48dc1..fa87232ed7 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index e913f683a6..4d23527106 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -753,6 +753,24 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column k: "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  null value in column "c" of relation "t_on_error_null" violates not-null constraint
+DETAIL:  Failing row contains (11, null, 12).
+CONTEXT:  COPY t_on_error_null, line 1: "11	a	12"
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+select * from t_on_error_null;
+ a | c  | b  
+---+----+----
+   | 11 | 13
+   | 11 | 14
+(2 rows)
+
+drop table t_on_error_null;
+drop domain d_int_not_null;
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -789,6 +807,27 @@ CONTEXT:  COPY check_ign_err, line 1: "1	{1}"
 COPY check_ign_err FROM STDIN WITH (on_error ignore);
 ERROR:  extra data after last expected column
 CONTEXT:  COPY check_ign_err, line 1: "1	{1}	3	abc"
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 8b14962194..4abc18a6db 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -526,6 +526,24 @@ a	{2}	2
 8	{8}	8
 \.
 
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+a	11	14
+\.
+
+select * from t_on_error_null;
+drop table t_on_error_null;
+drop domain d_int_not_null;
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -557,6 +575,29 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore);
 1	{1}	3	abc
 \.
 
+
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-09-09 14:33  Jim Jones <[email protected]>
  parent: jian he <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Jim Jones @ 2024-09-09 14:33 UTC (permalink / raw)
  To: jian he <[email protected]>; +Cc: David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>


Hi there

On 26.08.24 02:00, jian he wrote:
> hi all.
> patch updated.
> simplified the code a lot.
>
> idea is same:
> COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
>
> If the STDIN number of columns is the same as the target table, then
> InputFunctionCallSafe
> call failure will make that column values be null.
>
>
> If the STDIN number of columns is not the same as the target table, then error
> ERROR:  missing data for column \"%s\"
> ERROR:  extra data after last expected column
> which is status quo.

I wanted to give it another try, but the patch does not apply ...

$ git apply ~/patches/copy_on_error/v3-0001-on_error-set_to_null.patch -v
Checking patch doc/src/sgml/ref/copy.sgml...
Checking patch src/backend/commands/copy.c...
Checking patch src/backend/commands/copyfrom.c...
Checking patch src/backend/commands/copyfromparse.c...
Checking patch src/include/commands/copy.h...
Checking patch src/test/regress/expected/copy2.out...
error: while searching for:
NOTICE:  skipping row due to data type incompatibility at line 8 for
column k: "a"
CONTEXT:  COPY check_ign_err
NOTICE:  6 rows were skipped due to data type incompatibility
-- tests for on_error option with log_verbosity and null constraint via
domain
CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);

error: patch failed: src/test/regress/expected/copy2.out:753
error: src/test/regress/expected/copy2.out: patch does not apply
Checking patch src/test/regress/sql/copy2.sql...


-- 
Jim







^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-09-12 10:13  jian he <[email protected]>
  parent: Jim Jones <[email protected]>
  0 siblings, 2 replies; 29+ messages in thread

From: jian he @ 2024-09-12 10:13 UTC (permalink / raw)
  To: Jim Jones <[email protected]>; +Cc: David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Mon, Sep 9, 2024 at 10:34 PM Jim Jones <[email protected]> wrote:
>
>
> Hi there
>
> On 26.08.24 02:00, jian he wrote:
> > hi all.
> > patch updated.
> > simplified the code a lot.
> >
> > idea is same:
> > COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
> >
> > If the STDIN number of columns is the same as the target table, then
> > InputFunctionCallSafe
> > call failure will make that column values be null.
> >
> >
> > If the STDIN number of columns is not the same as the target table, then error
> > ERROR:  missing data for column \"%s\"
> > ERROR:  extra data after last expected column
> > which is status quo.
>
> I wanted to give it another try, but the patch does not apply ...
>

here we are.
please check the attached file.


Attachments:

  [text/x-patch] v4-0001-on_error-set_to_null.patch (11.1K, 2-v4-0001-on_error-set_to_null.patch)
  download | inline diff:
From 3d6b3d8b0393b5bc4950e85c40c69c0da46c8035 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Thu, 12 Sep 2024 17:07:02 +0800
Subject: [PATCH v4 1/1] on_error set_to_null

extent "on_error action", introduce new option: on_error set_to_null.

Due to current grammar, we cannot use "on_error null",
so I choose on_error set_to_null.

any data type conversion errors while the COPY FROM process will set that column value to be NULL.
this will only work with COPY FROM and non-binary format.

However this will respect the not-null constraint, meaning, if you actually converted error to null,
but the column has not-null constraint, not-null constraint violation ERROR will be raised.

discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  1 +
 src/backend/commands/copy.c          |  4 ++-
 src/backend/commands/copyfrom.c      |  9 ++---
 src/backend/commands/copyfromparse.c | 11 +++++++
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 49 ++++++++++++++++++++++++++++
 src/test/regress/sql/copy2.sql       | 44 +++++++++++++++++++++++++
 7 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 1518af8a04..b6bdf45e7e 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,6 +394,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       An <replaceable class="parameter">error_action</replaceable> value of
       <literal>stop</literal> means fail the command, while
       <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>set_to_null</literal> means the input value will set to <literal>null</literal> and continue with the next one.
       The default is <literal>stop</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3bb579a3a4..05b152a090 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 				 parser_errposition(pstate, def->location)));
 
 	/*
-	 * Allow "stop", or "ignore" values.
+	 * Allow "stop", or "ignore", "set_to_null" values.
 	 */
 	if (pg_strcasecmp(sval, "stop") == 0)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 2d3462913e..1669fac444 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1319,7 +1319,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
 		cstate->num_errors > 0)
 		ereport(NOTICE,
 				errmsg_plural("%llu row was skipped due to data type incompatibility",
@@ -1471,10 +1471,11 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+			cstate->opts.on_error == COPY_ON_ERROR_NULL)
 			cstate->escontext->details_wanted = false;
 	}
 	else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 97a4c387a3..3fe32b76ac 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -969,6 +969,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+					values[m] = (Datum) 0;
+					nulls[m] = true;
+					/*
+					 * set error_occurred to false, so next
+					 * InputFunctionCallSafe call behave sane.
+					*/
+					cstate->escontext->error_occurred = false;
+					continue;
+				}
 				cstate->num_errors++;
 
 				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 141fd48dc1..fa87232ed7 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 61a19cdc4c..b92a5771ff 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+                                                 ^
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +96,8 @@ COPY x to stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
 ERROR:  COPY ON_ERROR "unsupported" not recognized
 LINE 1: COPY x from stdin (on_error unsupported);
@@ -112,6 +118,10 @@ COPY x to stdin (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdin (format BINARY, on_error unsupported);
                                         ^
+COPY x to stdin (on_error set_to_null);
+ERROR:  COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+                         ^
 COPY x to stdout (log_verbosity unsupported);
 ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
 LINE 1: COPY x to stdout (log_verbosity unsupported);
@@ -753,6 +763,24 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column "k": "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  null value in column "c" of relation "t_on_error_null" violates not-null constraint
+DETAIL:  Failing row contains (11, null, 12).
+CONTEXT:  COPY t_on_error_null, line 1: "11	a	12"
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+select * from t_on_error_null;
+ a | c  | b  
+---+----+----
+   | 11 | 13
+   | 11 | 14
+(2 rows)
+
+drop table t_on_error_null;
+drop domain d_int_not_null;
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -789,6 +817,27 @@ CONTEXT:  COPY check_ign_err, line 1: "1	{1}"
 COPY check_ign_err FROM STDIN WITH (on_error ignore);
 ERROR:  extra data after last expected column
 CONTEXT:  COPY check_ign_err, line 1: "1	{1}	3	abc"
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 8b14962194..1144822768 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x to stdin (format BINARY, delimiter ',');
 COPY x to stdin (format BINARY, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
 COPY x from stdin (on_error unsupported);
 COPY x to stdin (format TEXT, force_quote(a));
 COPY x from stdin (format CSV, force_quote(a));
@@ -81,6 +83,7 @@ COPY x to stdin (format CSV, force_not_null(a));
 COPY x to stdout (format TEXT, force_null(a));
 COPY x to stdin (format CSV, force_null(a));
 COPY x to stdin (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
 COPY x to stdout (log_verbosity unsupported);
 
 -- too many columns in column list: should fail
@@ -526,6 +529,24 @@ a	{2}	2
 8	{8}	8
 \.
 
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+a	11	14
+\.
+
+select * from t_on_error_null;
+drop table t_on_error_null;
+drop domain d_int_not_null;
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -557,6 +578,29 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore);
 1	{1}	3	abc
 \.
 
+
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;

base-commit: 00c76cf21c42c17e60e73a87dea0d1b4e234d9da
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-09-16 10:38  Jim Jones <[email protected]>
  parent: jian he <[email protected]>
  1 sibling, 0 replies; 29+ messages in thread

From: Jim Jones @ 2024-09-16 10:38 UTC (permalink / raw)
  To: jian he <[email protected]>; +Cc: David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>



On 12.09.24 12:13, jian he wrote:
> please check the attached file.

v4 applies cleanly, it works as expected, and all tests pass.

postgres=# \pset null '(NULL)'
Null display is "(NULL)".

postgres=# CREATE TEMPORARY TABLE t2 (a int, b int);
CREATE TABLE

postgres=# COPY t2 (a,b) FROM STDIN WITH (on_error set_to_null, format csv);
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself, or an EOF signal.
>> 1,a
>> 2,1
>> 3,2
>> 4,b
>> a,c
>> \.
COPY 5

postgres=# SELECT * FROM t2;
   a    |   b    
--------+--------
      1 | (NULL)
      2 |      1
      3 |      2
      4 | (NULL)
 (NULL) | (NULL)
(5 rows)


Perhaps small changes in the docs:

<literal>set_to_null</literal> means the input value will set to
<literal>null</literal> and continue with the next one.

"will set" -> "will be set"
"and continue with" -> "and will continue with"

Other than that, LGTM.

Thanks!

-- 
Jim







^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-10-21 09:30  Kirill Reshke <[email protected]>
  parent: jian he <[email protected]>
  1 sibling, 1 reply; 29+ messages in thread

From: Kirill Reshke @ 2024-10-21 09:30 UTC (permalink / raw)
  To: jian he <[email protected]>; +Cc: Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

Hi!

On Thu, 12 Sept 2024 at 15:13, jian he <[email protected]> wrote:
>
> On Mon, Sep 9, 2024 at 10:34 PM Jim Jones <[email protected]> wrote:
> >
> >
> > Hi there
> >
> > On 26.08.24 02:00, jian he wrote:
> > > hi all.
> > > patch updated.
> > > simplified the code a lot.
> > >
> > > idea is same:
> > > COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
> > >
> > > If the STDIN number of columns is the same as the target table, then
> > > InputFunctionCallSafe
> > > call failure will make that column values be null.
> > >
> > >
> > > If the STDIN number of columns is not the same as the target table, then error
> > > ERROR:  missing data for column \"%s\"
> > > ERROR:  extra data after last expected column
> > > which is status quo.
> >
> > I wanted to give it another try, but the patch does not apply ...
> >
>
> here we are.
> please check the attached file.

Hi!
v4 no longer applies. It now conflicts with
e7834a1a251d4a28245377f383ff20a657ba8262.
Also, there were review comments.

So, I decided to rebase.

Review comments from [1] applied partially. I didn't do "and continue
with" -> "and will continue with" substitution as suggested, because
the first options are used for `ignore` doc one lines above. So, I
just don't know how to change this correctly. We definitely don't want
two separate forms of saying the same in 2 consecutive lines.

I did small changes:
1) added
`-- tests for set_to_null`
 option in the test script akin to 4ac2a9beceb10d44806d2cf157d5a931bdade39e

2) I rephrased
Allow "stop", or "ignore", "set_to_null" values
to
Allow "stop", "ignore", "set_to_null" values

PFA.

[1] https://www.postgresql.org/message-id/b26e9c6c-75bf-45ea-8aea-346dda3bd445%40uni-muenster.de
-- 
Best regards,
Kirill Reshke


Attachments:

  [application/octet-stream] v5-0001-on_error-set_to_null.patch (11.2K, 2-v5-0001-on_error-set_to_null.patch)
  download | inline diff:
From 44406d426f421508729b08b8014eccbd6ec3e6c9 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Thu, 12 Sep 2024 17:07:02 +0800
Subject: [PATCH v5] on_error set_to_null

extent "on_error action", introduce new option: on_error set_to_null.

Due to current grammar, we cannot use "on_error null",
so I choose on_error set_to_null.

any data type conversion errors while the COPY FROM process will set that column value to be NULL.
this will only work with COPY FROM and non-binary format.

However this will respect the not-null constraint, meaning, if you actually converted error to null,
but the column has not-null constraint, not-null constraint violation ERROR will be raised.

discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  1 +
 src/backend/commands/copy.c          |  4 ++-
 src/backend/commands/copyfrom.c      |  9 ++---
 src/backend/commands/copyfromparse.c | 11 ++++++
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 50 ++++++++++++++++++++++++++++
 src/test/regress/sql/copy2.sql       | 44 ++++++++++++++++++++++++
 7 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f096..7f2b0e40662 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -396,6 +396,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       An <replaceable class="parameter">error_action</replaceable> value of
       <literal>stop</literal> means fail the command, while
       <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>set_to_null</literal> means the input value will be set to <literal>null</literal> and continue with the next one.
       The default is <literal>stop</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663f..605c7c60f19 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 				 parser_errposition(pstate, def->location)));
 
 	/*
-	 * Allow "stop", or "ignore" values.
+	 * Allow "stop", "ignore", "set_to_null" values.
 	 */
 	if (pg_strcasecmp(sval, "stop") == 0)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 07cbd5d22b8..dde30989009 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1321,7 +1321,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
 		cstate->num_errors > 0 &&
 		cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
 		ereport(NOTICE,
@@ -1474,10 +1474,11 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+			cstate->opts.on_error == COPY_ON_ERROR_NULL)
 			cstate->escontext->details_wanted = false;
 	}
 	else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 654fecb1b14..8ab42dbceee 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -961,6 +961,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+					values[m] = (Datum) 0;
+					nulls[m] = true;
+					/*
+					 * set error_occurred to false, so next
+					 * InputFunctionCallSafe call behave sane.
+					*/
+					cstate->escontext->error_occurred = false;
+					continue;
+				}
 				cstate->num_errors++;
 
 				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 4002a7f5382..051ca12d107 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae8..55ece18f9bf 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+                                                 ^
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +96,8 @@ COPY x from stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
 ERROR:  COPY ON_ERROR "unsupported" not recognized
 LINE 1: COPY x from stdin (on_error unsupported);
@@ -124,6 +130,10 @@ COPY x to stdout (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdout (format BINARY, on_error unsupported);
                                          ^
+COPY x to stdin (on_error set_to_null);
+ERROR:  COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+                         ^
 COPY x from stdin (log_verbosity unsupported);
 ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
 LINE 1: COPY x from stdin (log_verbosity unsupported);
@@ -769,6 +779,24 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column "k": "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  null value in column "c" of relation "t_on_error_null" violates not-null constraint
+DETAIL:  Failing row contains (11, null, 12).
+CONTEXT:  COPY t_on_error_null, line 1: "11	a	12"
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+select * from t_on_error_null;
+ a | c  | b  
+---+----+----
+   | 11 | 13
+   | 11 | 14
+(2 rows)
+
+drop table t_on_error_null;
+drop domain d_int_not_null;
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -813,6 +841,28 @@ ERROR:  skipped more than REJECT_LIMIT (3) rows due to data type incompatibility
 CONTEXT:  COPY check_ign_err, line 5, column n: ""
 COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4);
 NOTICE:  4 rows were skipped due to data type incompatibility
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce0..8d974f4d9a9 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x from stdin (format BINARY, delimiter ',');
 COPY x from stdin (format BINARY, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
 COPY x from stdin (on_error unsupported);
 COPY x from stdin (format TEXT, force_quote(a));
 COPY x from stdin (format TEXT, force_quote *);
@@ -87,6 +89,7 @@ COPY x from stdin (format TEXT, force_null *);
 COPY x to stdout (format CSV, force_null(a));
 COPY x to stdout (format CSV, force_null *);
 COPY x to stdout (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
@@ -534,6 +537,24 @@ a	{2}	2
 8	{8}	8
 \.
 
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+a	11	14
+\.
+
+select * from t_on_error_null;
+drop table t_on_error_null;
+drop domain d_int_not_null;
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -588,6 +609,29 @@ a	{7}	7
 10	{10}	10
 \.
 
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-10-21 12:39  Fujii Masao <[email protected]>
  parent: Kirill Reshke <[email protected]>
  0 siblings, 2 replies; 29+ messages in thread

From: Fujii Masao @ 2024-10-21 12:39 UTC (permalink / raw)
  To: Kirill Reshke <[email protected]>; jian he <[email protected]>; +Cc: Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>



On 2024/10/21 18:30, Kirill Reshke wrote:
> v4 no longer applies. It now conflicts with
> e7834a1a251d4a28245377f383ff20a657ba8262.
> Also, there were review comments.
> 
> So, I decided to rebase.

Thanks for the patch! Here are my review comments:

I noticed that on_error=set_to_null does not trigger NOTICE messages for rows
and columns with errors. It's "unexpected" thing for columns to be silently
replaced with NULL due to on_error=set_to_null. So, similar to on_error=ignore,
there should be NOTICE messages indicating which input records had columns
set to NULL because of data type incompatibility. Without these messages,
users might not realize that some columns were set to NULL.


How should on_error=set_to_null behave when reject_limit is set? It seems
intuitive to trigger an error if the number of rows with columns' data type
issues exceeds reject_limit, similar to on_error=ignore. This is open to discussion.


psql's tab-completion should be updated to include SET_TO_NULL.


        An <replaceable class="parameter">error_action</replaceable> value of
        <literal>stop</literal> means fail the command, while
        <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>set_to_null</literal> means the input value will be set to <literal>null</literal> and continue with the next one.

How about merging these two descriptions to one and updating it to the following?

-------------------
An error_action value of stop means fail the command, ignore means discard the input
row and continue with the next one, and set_to_null means replace columns with invalid
input values with NULL and move to the next row.
-------------------


       The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>

This should be "... ignore and set_to_null options are ..."?

Regards,

-- 
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION







^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-10-25 21:03  Kirill Reshke <[email protected]>
  parent: Fujii Masao <[email protected]>
  1 sibling, 1 reply; 29+ messages in thread

From: Kirill Reshke @ 2024-10-25 21:03 UTC (permalink / raw)
  To: Fujii Masao <[email protected]>; +Cc: jian he <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

Hi!

On Mon, 21 Oct 2024 at 17:39, Fujii Masao <[email protected]> wrote:
>
>
>
> On 2024/10/21 18:30, Kirill Reshke wrote:
> > v4 no longer applies. It now conflicts with
> > e7834a1a251d4a28245377f383ff20a657ba8262.
> > Also, there were review comments.
> >
> > So, I decided to rebase.
>
> Thanks for the patch! Here are my review comments:
>
> I noticed that on_error=set_to_null does not trigger NOTICE messages for rows
> and columns with errors. It's "unexpected" thing for columns to be silently
> replaced with NULL due to on_error=set_to_null. So, similar to on_error=ignore,
> there should be NOTICE messages indicating which input records had columns
> set to NULL because of data type incompatibility. Without these messages,
> users might not realize that some columns were set to NULL.

Nice catch. That is a feature introduced by
f5a227895e178bf528b18f82bbe554435fb3e64f.

>
> How should on_error=set_to_null behave when reject_limit is set? It seems
> intuitive to trigger an error if the number of rows with columns' data type
> issues exceeds reject_limit, similar to on_error=ignore. This is open to discussion.

Ok, let's discuss. My first suggestion was:

when the REJECT LIMIT is set to some non-zero number and the number of
row NULL replacements exceeds the limit, is it OK to fail. Because
there WAS errors, and we should not tolerate more than $limit errors .
I do find this behavior to be consistent.
But what if we don't set a REJECT LIMIT, it is sane to do all
replacements, as if REJECT LIMIT is inf. But our REJECT LIMIT is zero
(not set).
So, we ignore zero REJECT LIMIT if set_to_null is set.

But while I was trying to implement that, I realized that I don't
understand v4 of this patch. My misunderstanding is about
`t_on_error_null` tests. We are allowed to insert a NULL value for the
first column of t_on_error_null using COPY ON_ERROR SET_TO_NULL. Why
do we do that? My thought is we should try to execute
InputFunctionCallSafe with NULL value  (i mean, here [1]) for the
column after we failed to insert the input value. And, if this second
call is successful, we do replacement, otherwise we count the row as
erroneous.

>
> psql's tab-completion should be updated to include SET_TO_NULL.

Agreed.

>
>
>         An <replaceable class="parameter">error_action</replaceable> value of
>         <literal>stop</literal> means fail the command, while
>         <literal>ignore</literal> means discard the input row and continue with the next one.
> +      <literal>set_to_null</literal> means the input value will be set to <literal>null</literal> and continue with the next one.
>
> How about merging these two descriptions to one and updating it to the following?
>
> -------------------
> An error_action value of stop means fail the command, ignore means discard the input
> row and continue with the next one, and set_to_null means replace columns with invalid
> input values with NULL and move to the next row.
> -------------------
>

Hm, good catch. Applied almost as you suggested. I did tweak this
"replace columns with invalid input values with " into "replace
columns containing erroneous input values with". Is that OK?

>        The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
>
> This should be "... ignore and set_to_null options are ..."?

Sure!

> Regards,
>
> --
> Fujii Masao
> Advanced Computing Technology Center
> Research and Development Headquarters
> NTT DATA CORPORATION
>

Here I sent v6 where review comments, except set_to_null vs reject
limit, were addressed. No new tests added yet, as some details are
unclear...


[1] https://github.com/postgresql-cfbot/postgresql/compare/cf/4810~1...cf/4810#diff-98d8bfd706468f77f8b0...
--
Best regards,
Kirill Reshke


Attachments:

  [application/octet-stream] v6-0001-on_error-set_to_null.patch (13.4K, 2-v6-0001-on_error-set_to_null.patch)
  download | inline diff:
From 85a62beb248cf053dd0e05b01df7572dff7766db Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Thu, 12 Sep 2024 17:07:02 +0800
Subject: [PATCH v6] on_error set_to_null

extent "on_error action", introduce new option: on_error set_to_null.

Due to current grammar, we cannot use "on_error null",
so I choose on_error set_to_null.

any data type conversion errors while the COPY FROM process will set that column value to be NULL.
this will only work with COPY FROM and non-binary format.

However this will respect the not-null constraint, meaning, if you actually converted error to null,
but the column has not-null constraint, not-null constraint violation ERROR will be raised.

discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  7 ++--
 src/backend/commands/copy.c          | 12 ++++---
 src/backend/commands/copyfrom.c      |  9 ++---
 src/backend/commands/copyfromparse.c | 11 ++++++
 src/bin/psql/tab-complete.in.c       |  2 +-
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 50 ++++++++++++++++++++++++++++
 src/test/regress/sql/copy2.sql       | 44 ++++++++++++++++++++++++
 8 files changed, 123 insertions(+), 13 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f096..dcbfa17a3ce 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,12 +394,13 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Specifies how to behave when encountering an error converting a column's
       input value into its data type.
       An <replaceable class="parameter">error_action</replaceable> value of
-      <literal>stop</literal> means fail the command, while
-      <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>stop</literal> means fail the command,
+      <literal>ignore</literal> means discard the input row and continue with the next one, and
+      <literal>set_to_null</literal> means replace columns containing erroneous input values with <literal>null</literal> and move to the next row.
       The default is <literal>stop</literal>.
      </para>
      <para>
-      The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
+      The <literal>ignore</literal> and <literal>set_to_null</literal> options are applicable only for <command>COPY FROM</command>
       when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663f..304022cd867 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 				 parser_errposition(pstate, def->location)));
 
 	/*
-	 * Allow "stop", or "ignore" values.
+	 * Allow "stop", "ignore", "set_to_null" values.
 	 */
 	if (pg_strcasecmp(sval, "stop") == 0)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -904,13 +906,13 @@ ProcessCopyOptions(ParseState *pstate,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
 
-	if (opts_out->reject_limit && !opts_out->on_error)
+	if (opts_out->reject_limit && !(opts_out->on_error == COPY_ON_ERROR_NULL || opts_out->on_error == COPY_ON_ERROR_IGNORE))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 		/*- translator: first and second %s are the names of COPY option, e.g.
-		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */
-				 errmsg("COPY %s requires %s to be set to %s",
-						"REJECT_LIMIT", "ON_ERROR", "IGNORE")));
+		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE or SET_TO_NULL */
+				 errmsg("COPY %s requires %s to be set to %s or %s",
+						"REJECT_LIMIT", "ON_ERROR", "IGNORE", "SET_TO_NULL")));
 }
 
 /*
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 07cbd5d22b8..dde30989009 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1321,7 +1321,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
 		cstate->num_errors > 0 &&
 		cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
 		ereport(NOTICE,
@@ -1474,10 +1474,11 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+			cstate->opts.on_error == COPY_ON_ERROR_NULL)
 			cstate->escontext->details_wanted = false;
 	}
 	else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 654fecb1b14..8ab42dbceee 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -961,6 +961,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+					values[m] = (Datum) 0;
+					nulls[m] = true;
+					/*
+					 * set error_occurred to false, so next
+					 * InputFunctionCallSafe call behave sane.
+					*/
+					cstate->escontext->error_occurred = false;
+					continue;
+				}
 				cstate->num_errors++;
 
 				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index 1be0056af73..33652b79f62 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id,
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
 					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
-					  "ON_ERROR", "LOG_VERBOSITY");
+					  "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 4002a7f5382..051ca12d107 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae8..55ece18f9bf 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+                                                 ^
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +96,8 @@ COPY x from stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
 ERROR:  COPY ON_ERROR "unsupported" not recognized
 LINE 1: COPY x from stdin (on_error unsupported);
@@ -124,6 +130,10 @@ COPY x to stdout (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdout (format BINARY, on_error unsupported);
                                          ^
+COPY x to stdin (on_error set_to_null);
+ERROR:  COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+                         ^
 COPY x from stdin (log_verbosity unsupported);
 ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
 LINE 1: COPY x from stdin (log_verbosity unsupported);
@@ -769,6 +779,24 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column "k": "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  null value in column "c" of relation "t_on_error_null" violates not-null constraint
+DETAIL:  Failing row contains (11, null, 12).
+CONTEXT:  COPY t_on_error_null, line 1: "11	a	12"
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+select * from t_on_error_null;
+ a | c  | b  
+---+----+----
+   | 11 | 13
+   | 11 | 14
+(2 rows)
+
+drop table t_on_error_null;
+drop domain d_int_not_null;
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -813,6 +841,28 @@ ERROR:  skipped more than REJECT_LIMIT (3) rows due to data type incompatibility
 CONTEXT:  COPY check_ign_err, line 5, column n: ""
 COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4);
 NOTICE:  4 rows were skipped due to data type incompatibility
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce0..8d974f4d9a9 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x from stdin (format BINARY, delimiter ',');
 COPY x from stdin (format BINARY, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
 COPY x from stdin (on_error unsupported);
 COPY x from stdin (format TEXT, force_quote(a));
 COPY x from stdin (format TEXT, force_quote *);
@@ -87,6 +89,7 @@ COPY x from stdin (format TEXT, force_null *);
 COPY x to stdout (format CSV, force_null(a));
 COPY x to stdout (format CSV, force_null *);
 COPY x to stdout (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
@@ -534,6 +537,24 @@ a	{2}	2
 8	{8}	8
 \.
 
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+a	11	14
+\.
+
+select * from t_on_error_null;
+drop table t_on_error_null;
+drop domain d_int_not_null;
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -588,6 +609,29 @@ a	{7}	7
 10	{10}	10
 \.
 
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-10-27 11:32  jian he <[email protected]>
  parent: Fujii Masao <[email protected]>
  1 sibling, 0 replies; 29+ messages in thread

From: jian he @ 2024-10-27 11:32 UTC (permalink / raw)
  To: Fujii Masao <[email protected]>; +Cc: Kirill Reshke <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Mon, Oct 21, 2024 at 8:39 PM Fujii Masao <[email protected]> wrote:
> On 2024/10/21 18:30, Kirill Reshke wrote:
> > v4 no longer applies. It now conflicts with
> > e7834a1a251d4a28245377f383ff20a657ba8262.
> > Also, there were review comments.
> >
> > So, I decided to rebase.
>
> Thanks for the patch! Here are my review comments:
>
> I noticed that on_error=set_to_null does not trigger NOTICE messages for rows
> and columns with errors. It's "unexpected" thing for columns to be silently
> replaced with NULL due to on_error=set_to_null. So, similar to on_error=ignore,
> there should be NOTICE messages indicating which input records had columns
> set to NULL because of data type incompatibility. Without these messages,
> users might not realize that some columns were set to NULL.
>

on_error=set_to_null,
we have two options for CopyFromStateData->num_errors.
A. Counting the number of rows that on_error set_to_null happened.
B. Counting number of times that on_error set_to_null happened

let's say optionA:
        ereport(NOTICE,
                errmsg_plural("%llu row was converted to NULL due to
data type incompatibility",
                              "%llu rows were converted to NULL due to
data type incompatibility",
                              (unsigned long long) cstate->num_errors,
                              (unsigned long long) cstate->num_errors));

I doubt the above message is accurate.
"%llu row was converted to NULL"
can mean "%llu row, for each row, all columns was converted to NULL"
but here we are
"%llu row, for each row, some column (can be all columns) was converted to NULL"


optionB: the message can be:
errmsg_plural("converted to NULL due to data type incompatibility
happened %llu time")
but I aslo feel the wording is not perfect also.

So overall I am not sure how to construct the NOTICE messages.






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-07 18:00  Fujii Masao <[email protected]>
  parent: Kirill Reshke <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Fujii Masao @ 2024-11-07 18:00 UTC (permalink / raw)
  To: Kirill Reshke <[email protected]>; +Cc: jian he <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>



On 2024/10/26 6:03, Kirill Reshke wrote:
> when the REJECT LIMIT is set to some non-zero number and the number of
> row NULL replacements exceeds the limit, is it OK to fail. Because
> there WAS errors, and we should not tolerate more than $limit errors .
> I do find this behavior to be consistent.

+1


> But what if we don't set a REJECT LIMIT, it is sane to do all
> replacements, as if REJECT LIMIT is inf.

+1


> But our REJECT LIMIT is zero
> (not set).
> So, we ignore zero REJECT LIMIT if set_to_null is set.

REJECT_LIMIT currently has to be greater than zero, so it won’t ever be zero.


> But while I was trying to implement that, I realized that I don't
> understand v4 of this patch. My misunderstanding is about
> `t_on_error_null` tests. We are allowed to insert a NULL value for the
> first column of t_on_error_null using COPY ON_ERROR SET_TO_NULL. Why
> do we do that? My thought is we should try to execute
> InputFunctionCallSafe with NULL value  (i mean, here [1]) for the
> column after we failed to insert the input value. And, if this second
> call is successful, we do replacement, otherwise we count the row as
> erroneous.

Your concern is valid. Allowing NULL to be stored in a column with a NOT NULL
constraint via COPY ON_ERROR=SET_TO_NULL does seem unexpected. As you suggested,
NULL values set by SET_TO_NULL should probably be re-evaluated.


> Hm, good catch. Applied almost as you suggested. I did tweak this
> "replace columns with invalid input values with " into "replace
> columns containing erroneous input values with". Is that OK?

Yes, sounds good.

Regards,

-- 
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION







^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-09 12:55  Kirill Reshke <[email protected]>
  parent: Fujii Masao <[email protected]>
  0 siblings, 2 replies; 29+ messages in thread

From: Kirill Reshke @ 2024-11-09 12:55 UTC (permalink / raw)
  To: Fujii Masao <[email protected]>; +Cc: jian he <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Thu, 7 Nov 2024 at 23:00, Fujii Masao <[email protected]> wrote:
>
>
>
> On 2024/10/26 6:03, Kirill Reshke wrote:
> > when the REJECT LIMIT is set to some non-zero number and the number of
> > row NULL replacements exceeds the limit, is it OK to fail. Because
> > there WAS errors, and we should not tolerate more than $limit errors .
> > I do find this behavior to be consistent.
>
> +1
>
>
> > But what if we don't set a REJECT LIMIT, it is sane to do all
> > replacements, as if REJECT LIMIT is inf.
>
> +1

After thinking for a while, I'm now more opposed to this approach. I
think we should count rows with erroneous data as errors only if
null substitution for these rows failed, not the total number of rows
which were modified.
Then, to respect the REJECT LIMIT option, we compare this number with
the limit. This is actually simpler approach IMHO. What do You think?

> > But while I was trying to implement that, I realized that I don't
> > understand v4 of this patch. My misunderstanding is about
> > `t_on_error_null` tests. We are allowed to insert a NULL value for the
> > first column of t_on_error_null using COPY ON_ERROR SET_TO_NULL. Why
> > do we do that? My thought is we should try to execute
> > InputFunctionCallSafe with NULL value  (i mean, here [1]) for the
> > column after we failed to insert the input value. And, if this second
> > call is successful, we do replacement, otherwise we count the row as
> > erroneous.
>
> Your concern is valid. Allowing NULL to be stored in a column with a NOT NULL
> constraint via COPY ON_ERROR=SET_TO_NULL does seem unexpected. As you suggested,
> NULL values set by SET_TO_NULL should probably be re-evaluated.

Thank you. I updated the patch with a NULL re-evaluation.


PFA v7. I did not yet update the doc for this patch version, waiting
for feedback about REJECT LIMIT + SET_TO_NULL behaviour.

Best regards,
Kirill Reshke


Attachments:

  [application/octet-stream] v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch (20.8K, 2-v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch)
  download | inline diff:
From 0e04606ed5c76f0fe079bcb157194ab06f7272aa Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Thu, 12 Sep 2024 17:07:02 +0800
Subject: [PATCH v7] Incrtoduce COPY option to replace columns containing
 erroneous data with null

extent "on_error action", introduce new option: on_error set_to_null.

Due to current grammar, we cannot use "on_error null",
so I choose on_error set_to_null.

any data type conversion errors while the COPY FROM process will set that column value to be NULL.
this will only work with COPY FROM and non-binary format.

However this will respect the not-null constraint, meaning, if you actually converted error to null,
but the column has not-null constraint, not-null constraint violation ERROR will be raised.

discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  7 ++-
 src/backend/commands/copy.c          | 12 ++--
 src/backend/commands/copyfrom.c      | 40 ++++++++----
 src/backend/commands/copyfromparse.c | 46 ++++++++++++++
 src/bin/psql/tab-complete.in.c       |  2 +-
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 91 +++++++++++++++++++++++++++-
 src/test/regress/sql/copy2.sql       | 78 ++++++++++++++++++++++++
 8 files changed, 255 insertions(+), 22 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f096..dcbfa17a3ce 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,12 +394,13 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Specifies how to behave when encountering an error converting a column's
       input value into its data type.
       An <replaceable class="parameter">error_action</replaceable> value of
-      <literal>stop</literal> means fail the command, while
-      <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>stop</literal> means fail the command,
+      <literal>ignore</literal> means discard the input row and continue with the next one, and
+      <literal>set_to_null</literal> means replace columns containing erroneous input values with <literal>null</literal> and move to the next row.
       The default is <literal>stop</literal>.
      </para>
      <para>
-      The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
+      The <literal>ignore</literal> and <literal>set_to_null</literal> options are applicable only for <command>COPY FROM</command>
       when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663f..304022cd867 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 				 parser_errposition(pstate, def->location)));
 
 	/*
-	 * Allow "stop", or "ignore" values.
+	 * Allow "stop", "ignore", "set_to_null" values.
 	 */
 	if (pg_strcasecmp(sval, "stop") == 0)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -904,13 +906,13 @@ ProcessCopyOptions(ParseState *pstate,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
 
-	if (opts_out->reject_limit && !opts_out->on_error)
+	if (opts_out->reject_limit && !(opts_out->on_error == COPY_ON_ERROR_NULL || opts_out->on_error == COPY_ON_ERROR_IGNORE))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 		/*- translator: first and second %s are the names of COPY option, e.g.
-		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */
-				 errmsg("COPY %s requires %s to be set to %s",
-						"REJECT_LIMIT", "ON_ERROR", "IGNORE")));
+		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE or SET_TO_NULL */
+				 errmsg("COPY %s requires %s to be set to %s or %s",
+						"REJECT_LIMIT", "ON_ERROR", "IGNORE", "SET_TO_NULL")));
 }
 
 /*
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 07cbd5d22b8..5fc9d83270b 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1003,7 +1003,7 @@ CopyFrom(CopyFromState cstate)
 		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
 			break;
 
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
+		if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) &&
 			cstate->escontext->error_occurred)
 		{
 			/*
@@ -1018,12 +1018,29 @@ CopyFrom(CopyFromState cstate)
 			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
 										 cstate->num_errors);
 
-			if (cstate->opts.reject_limit > 0 && \
-				cstate->num_errors > cstate->opts.reject_limit)
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
-								(long long) cstate->opts.reject_limit)));
+			if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+			{
+				if (cstate->opts.reject_limit > 0 && cstate->num_errors > cstate->opts.reject_limit)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
+									(long long) cstate->opts.reject_limit)));
+			}
+			else
+			{
+				/* Provide different error msg if reject_limit is zero */
+				if (cstate->opts.reject_limit == 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("failed to replace column containing erroneous data with null",
+									(long long) cstate->opts.reject_limit),
+							errhint("Consider specifying the REJECT LIMIT option to skip erroneous rows.")));
+				else if (cstate->num_errors > cstate->opts.reject_limit)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("encountered more than REJECT_LIMIT (%lld) rows with data type incompatibility",
+									(long long) cstate->opts.reject_limit)));
+			}
 
 			/* Repeat NextCopyFrom() until no soft error occurs */
 			continue;
@@ -1321,7 +1338,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) &&
 		cstate->num_errors > 0 &&
 		cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
 		ereport(NOTICE,
@@ -1474,10 +1491,11 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+			cstate->opts.on_error == COPY_ON_ERROR_NULL)
 			cstate->escontext->details_wanted = false;
 	}
 	else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index d1d43b53d83..1511b44f4ba 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -960,6 +960,52 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
+				/* 
+				* We encountered an error while parsing one of attributes.
+				*/
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL && string != NULL)
+				{
+					/*
+					* Temporary unset error_occurred.
+					* If null substitution for this attribute will
+					* succeed, we do not count this row as erroneous
+					*/
+					cstate->escontext->error_occurred = false;
+					
+					if (InputFunctionCallSafe(&in_functions[m],
+											NULL,
+											typioparams[m],
+											att->atttypmod,
+											(Node *) cstate->escontext,
+											&values[m]))
+					{
+						/* If datatype if okay with NULL, replace 
+						* with null
+						*/
+						nulls[m] = true;
+
+						if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+						{
+						
+							ereport(NOTICE,
+									errmsg("replaced row attribute \"%s\" with NULL due to data type incompatibility at line %llu.",
+										cstate->cur_attname, (unsigned long long) cstate->cur_lineno));
+						}
+						continue;
+					}
+
+					if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+					{
+						ereport(NOTICE,
+								errmsg("failed to replace row attribute \"%s\" with NULL at line %llu.",
+									   cstate->cur_attname, (unsigned long long)cstate->cur_lineno));
+					}
+				}
+
+				/* 
+				* Update copy state counter for number of erroneous rows
+				* as we are going to return from function.
+				*/
 				cstate->num_errors++;
 
 				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index fad2277991d..c2902ffc339 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id,
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
 					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
-					  "ON_ERROR", "LOG_VERBOSITY");
+					  "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 4002a7f5382..051ca12d107 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae8..22605a0ae45 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+                                                 ^
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +96,8 @@ COPY x from stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
 ERROR:  COPY ON_ERROR "unsupported" not recognized
 LINE 1: COPY x from stdin (on_error unsupported);
@@ -124,12 +130,16 @@ COPY x to stdout (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdout (format BINARY, on_error unsupported);
                                          ^
+COPY x to stdin (on_error set_to_null);
+ERROR:  COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+                         ^
 COPY x from stdin (log_verbosity unsupported);
 ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
 LINE 1: COPY x from stdin (log_verbosity unsupported);
                            ^
 COPY x from stdin with (reject_limit 1);
-ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
+ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE or SET_TO_NULL
 COPY x from stdin with (on_error ignore, reject_limit 0);
 ERROR:  REJECT_LIMIT (0) must be greater than zero
 -- too many columns in column list: should fail
@@ -769,6 +779,47 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column "k": "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-- check inserted content
+TABLE t_on_error_null;
+ a  | b  | c  
+----+----+----
+ 11 |    | 12
+  1 | 11 |   
+(2 rows)
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  failed to replace column containing erroneous data with null
+HINT:  Consider specifying the REJECT LIMIT option to skip erroneous rows.
+CONTEXT:  COPY t_on_error_null, line 1, column a: "a"
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  failed to replace column containing erroneous data with null
+HINT:  Consider specifying the REJECT LIMIT option to skip erroneous rows.
+CONTEXT:  COPY t_on_error_null, line 1, column a: "-1"
+--ok. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+NOTICE:  2 rows were skipped due to data type incompatibility
+-- check inserted content
+TABLE t_on_error_null;
+ a  | b  | c  
+----+----+----
+ 11 |    | 12
+  1 | 11 |   
+  1 | 11 | 14
+(3 rows)
+
+--fail. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+ERROR:  encountered more than REJECT_LIMIT (2) rows with data type incompatibility
+CONTEXT:  COPY t_on_error_null, line 3, column a: null input
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -776,6 +827,17 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 NOTICE:  skipping row due to data type incompatibility at line 2 for column "l": null input
 CONTEXT:  COPY check_ign_err2
 NOTICE:  1 row was skipped due to data type incompatibility
+-- check null substitution massages.
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose);
+NOTICE:  replaced row attribute "k" with NULL due to data type incompatibility at line 1.
+CONTEXT:  COPY check_ign_err2, line 1, column k: "foo"
+NOTICE:  skipping row due to data type incompatibility at line 2 for column "l": null input
+CONTEXT:  COPY check_ign_err2
+NOTICE:  failed to replace row attribute "l" with NULL at line 3.
+CONTEXT:  COPY check_ign_err2, line 3, column l: "'foooooooooooooooo'"
+NOTICE:  skipping row due to data type incompatibility at line 3 for column "l": "'foooooooooooooooo'"
+CONTEXT:  COPY check_ign_err2
+NOTICE:  2 rows were skipped due to data type incompatibility
 COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
 -- reset context choice
 \set SHOW_CONTEXT errors
@@ -791,8 +853,9 @@ SELECT * FROM check_ign_err2;
  n |  m  | k |   l   
 ---+-----+---+-------
  1 | {1} | 1 | 'foo'
+ 1 | {1} |   | 'foo'
  3 | {3} | 3 | 'bar'
-(2 rows)
+(3 rows)
 
 -- test datatype error that can't be handled as soft: should fail
 CREATE TABLE hard_err(foo widget);
@@ -813,6 +876,28 @@ ERROR:  skipped more than REJECT_LIMIT (3) rows due to data type incompatibility
 CONTEXT:  COPY check_ign_err, line 5, column n: ""
 COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4);
 NOTICE:  4 rows were skipped due to data type incompatibility
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -828,6 +913,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
 DROP TABLE check_ign_err2;
 DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce0..f2527d3aeaa 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x from stdin (format BINARY, delimiter ',');
 COPY x from stdin (format BINARY, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
 COPY x from stdin (on_error unsupported);
 COPY x from stdin (format TEXT, force_quote(a));
 COPY x from stdin (format TEXT, force_quote *);
@@ -87,6 +89,7 @@ COPY x from stdin (format TEXT, force_null *);
 COPY x to stdout (format CSV, force_null(a));
 COPY x to stdout (format CSV, force_null *);
 COPY x to stdout (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
@@ -534,6 +537,50 @@ a	{2}	2
 8	{8}	8
 \.
 
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+1	11	d
+\.
+
+-- check inserted content
+TABLE t_on_error_null;
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+a	11	14
+\.
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+\.
+
+--ok. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+-1	11	13
+a	11	14
+1	11	14
+\.
+
+-- check inserted content
+TABLE t_on_error_null;
+
+--fail. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+-1	11	13
+a	11	14
+\N	11	14
+\.
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -541,6 +588,12 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 1	{1}	1	'foo'
 2	{2}	2	\N
 \.
+-- check null substitution massages.
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose);
+1	{1}	foo	'foo'
+2	{2}	2	\N
+2	{2}	2	'foooooooooooooooo'
+\.
 COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
 3	{3}	3	'bar'
 4	{4}	4	\N
@@ -588,6 +641,29 @@ a	{7}	7
 10	{10}	10
 \.
 
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -603,6 +679,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
 DROP TABLE check_ign_err2;
 DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-11 11:11  torikoshia <[email protected]>
  parent: Kirill Reshke <[email protected]>
  1 sibling, 1 reply; 29+ messages in thread

From: torikoshia @ 2024-11-11 11:11 UTC (permalink / raw)
  To: Kirill Reshke <[email protected]>; +Cc: Fujii Masao <[email protected]>; jian he <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; PostgreSQL Hackers <[email protected]>

On 2024-11-09 21:55, Kirill Reshke wrote:

Thanks for working on this!

> On Thu, 7 Nov 2024 at 23:00, Fujii Masao <[email protected]> 
> wrote:
>> 
>> 
>> 
>> On 2024/10/26 6:03, Kirill Reshke wrote:
>> > when the REJECT LIMIT is set to some non-zero number and the number of
>> > row NULL replacements exceeds the limit, is it OK to fail. Because
>> > there WAS errors, and we should not tolerate more than $limit errors .
>> > I do find this behavior to be consistent.
>> 
>> +1
>> 
>> 
>> > But what if we don't set a REJECT LIMIT, it is sane to do all
>> > replacements, as if REJECT LIMIT is inf.
>> 
>> +1
> 
> After thinking for a while, I'm now more opposed to this approach. I
> think we should count rows with erroneous data as errors only if
> null substitution for these rows failed, not the total number of rows
> which were modified.
> Then, to respect the REJECT LIMIT option, we compare this number with
> the limit. This is actually simpler approach IMHO. What do You think?

IMHO I prefer the previous interpretation.
I'm not sure this is what people expect, but I assume that REJECT_LIMIT 
is used to specify how many malformed rows are acceptable in the 
"original" data source.

>> > But while I was trying to implement that, I realized that I don't
>> > understand v4 of this patch. My misunderstanding is about
>> > `t_on_error_null` tests. We are allowed to insert a NULL value for the
>> > first column of t_on_error_null using COPY ON_ERROR SET_TO_NULL. Why
>> > do we do that? My thought is we should try to execute
>> > InputFunctionCallSafe with NULL value  (i mean, here [1]) for the
>> > column after we failed to insert the input value. And, if this second
>> > call is successful, we do replacement, otherwise we count the row as
>> > erroneous.
>> 
>> Your concern is valid. Allowing NULL to be stored in a column with a 
>> NOT NULL
>> constraint via COPY ON_ERROR=SET_TO_NULL does seem unexpected. As you 
>> suggested,
>> NULL values set by SET_TO_NULL should probably be re-evaluated.
> 
> Thank you. I updated the patch with a NULL re-evaluation.
> 
> 
> PFA v7. I did not yet update the doc for this patch version, waiting
> for feedback about REJECT LIMIT + SET_TO_NULL behaviour.
> 


There were warnings when I applied the patch:

$ git apply 
v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch
v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:170: 
trailing whitespace.
                    /*
v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:181: 
trailing whitespace.

v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:189: 
trailing whitespace.
                                    /* If datatype if okay with NULL, 
replace
v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:196: 
trailing whitespace.

v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:212: 
trailing whitespace.
                                 /*

>  @@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState 
> *pstate, bool is_from)
>                   parser_errposition(pstate, def->location)));
...
> 
>  -   if (opts_out->reject_limit && !opts_out->on_error)
>  +   if (opts_out->reject_limit && !(opts_out->on_error == 
> COPY_ON_ERROR_NULL || opts_out->on_error == COPY_ON_ERROR_IGNORE))

Hmm, is this change necessary?
Personally, I feel the previous code is easier to read.


"REJECT LIMIT" should be "REJECT_LIMIT"?
> 1037                             errhint("Consider specifying the 
> REJECT LIMIT option to skip erroneous rows.")));


SET_TO_NULL is one of the value for ON_ERROR, but the patch treats 
SET_TO_NULL as option for COPY:

221 --- a/src/bin/psql/tab-complete.in.c
222 +++ b/src/bin/psql/tab-complete.in.c
223 @@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id,
224         COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
225                       "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
226                       "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", 
"DEFAULT",
227 -                     "ON_ERROR", "LOG_VERBOSITY");
228 +                     "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY");


> Best regards,
> Kirill Reshke

-- 
Regards,

--
Atsushi Torikoshi
Seconded from NTT DATA GROUP CORPORATION to SRA OSS K.K.






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-11 20:27  Kirill Reshke <[email protected]>
  parent: torikoshia <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Kirill Reshke @ 2024-11-11 20:27 UTC (permalink / raw)
  To: torikoshia <[email protected]>; +Cc: Fujii Masao <[email protected]>; jian he <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; PostgreSQL Hackers <[email protected]>

On Mon, 11 Nov 2024 at 16:11, torikoshia <[email protected]> wrote:
>
> On 2024-11-09 21:55, Kirill Reshke wrote:
>
> Thanks for working on this!

Thanks for reviewing the v7 patch series!

> > On Thu, 7 Nov 2024 at 23:00, Fujii Masao <[email protected]>
> > wrote:
> >>
> >>
> >>
> >> On 2024/10/26 6:03, Kirill Reshke wrote:
> >> > when the REJECT LIMIT is set to some non-zero number and the number of
> >> > row NULL replacements exceeds the limit, is it OK to fail. Because
> >> > there WAS errors, and we should not tolerate more than $limit errors .
> >> > I do find this behavior to be consistent.
> >>
> >> +1
> >>
> >>
> >> > But what if we don't set a REJECT LIMIT, it is sane to do all
> >> > replacements, as if REJECT LIMIT is inf.
> >>
> >> +1
> >
> > After thinking for a while, I'm now more opposed to this approach. I
> > think we should count rows with erroneous data as errors only if
> > null substitution for these rows failed, not the total number of rows
> > which were modified.
> > Then, to respect the REJECT LIMIT option, we compare this number with
> > the limit. This is actually simpler approach IMHO. What do You think?
>
> IMHO I prefer the previous interpretation.
> I'm not sure this is what people expect, but I assume that REJECT_LIMIT
> is used to specify how many malformed rows are acceptable in the
> "original" data source.

I do like the first version of interpretation, but I have a struggle
with it. According to this interpretation, we will fail COPY command
if the number
of malformed data rows exceeds the limit, not the number of rejected
rows (some percentage of malformed rows are accepted with null
substitution)
So, a proper name for the limit will be MALFORMED_LIMIT, or something.
However, we are unable to change the name since the REJECT_LIMIT
option has already been committed.
I guess I'll just have to put up with this contradiction. I will send
an updated patch shortly...


> >> > But while I was trying to implement that, I realized that I don't
> >> > understand v4 of this patch. My misunderstanding is about
> >> > `t_on_error_null` tests. We are allowed to insert a NULL value for the
> >> > first column of t_on_error_null using COPY ON_ERROR SET_TO_NULL. Why
> >> > do we do that? My thought is we should try to execute
> >> > InputFunctionCallSafe with NULL value  (i mean, here [1]) for the
> >> > column after we failed to insert the input value. And, if this second
> >> > call is successful, we do replacement, otherwise we count the row as
> >> > erroneous.
> >>
> >> Your concern is valid. Allowing NULL to be stored in a column with a
> >> NOT NULL
> >> constraint via COPY ON_ERROR=SET_TO_NULL does seem unexpected. As you
> >> suggested,
> >> NULL values set by SET_TO_NULL should probably be re-evaluated.
> >
> > Thank you. I updated the patch with a NULL re-evaluation.
> >
> >
> > PFA v7. I did not yet update the doc for this patch version, waiting
> > for feedback about REJECT LIMIT + SET_TO_NULL behaviour.
> >
>
>
> There were warnings when I applied the patch:
>
> $ git apply
> v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch
> v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:170:
> trailing whitespace.
>                     /*
> v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:181:
> trailing whitespace.
>
> v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:189:
> trailing whitespace.
>                                     /* If datatype if okay with NULL,
> replace
> v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:196:
> trailing whitespace.
>
> v7-0001-Incrtoduce-COPY-option-to-replace-columns-contain.patch:212:
> trailing whitespace.
>                                  /*
>
> >  @@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState
> > *pstate, bool is_from)
> >                   parser_errposition(pstate, def->location)));
> ...
> >
> >  -   if (opts_out->reject_limit && !opts_out->on_error)
> >  +   if (opts_out->reject_limit && !(opts_out->on_error ==
> > COPY_ON_ERROR_NULL || opts_out->on_error == COPY_ON_ERROR_IGNORE))
>
> Hmm, is this change necessary?
> Personally, I feel the previous code is easier to read.
>
>
> "REJECT LIMIT" should be "REJECT_LIMIT"?
> > 1037                             errhint("Consider specifying the
> > REJECT LIMIT option to skip erroneous rows.")));
>
>
> SET_TO_NULL is one of the value for ON_ERROR, but the patch treats
> SET_TO_NULL as option for COPY:
>
> 221 --- a/src/bin/psql/tab-complete.in.c
> 222 +++ b/src/bin/psql/tab-complete.in.c
> 223 @@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id,
> 224         COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
> 225                       "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
> 226                       "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING",
> "DEFAULT",
> 227 -                     "ON_ERROR", "LOG_VERBOSITY");
> 228 +                     "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY");
>
>
> > Best regards,
> > Kirill Reshke
>
> --
> Regards,
>
> --
> Atsushi Torikoshi
> Seconded from NTT DATA GROUP CORPORATION to SRA OSS K.K.



-- 
Best regards,
Kirill Reshke






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-12 05:03  Yugo Nagata <[email protected]>
  parent: Kirill Reshke <[email protected]>
  0 siblings, 0 replies; 29+ messages in thread

From: Yugo Nagata @ 2024-11-12 05:03 UTC (permalink / raw)
  To: Kirill Reshke <[email protected]>; +Cc: torikoshia <[email protected]>; Fujii Masao <[email protected]>; jian he <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; PostgreSQL Hackers <[email protected]>

On Tue, 12 Nov 2024 01:27:53 +0500
Kirill Reshke <[email protected]> wrote:

> On Mon, 11 Nov 2024 at 16:11, torikoshia <[email protected]> wrote:
> >
> > On 2024-11-09 21:55, Kirill Reshke wrote:
> >
> > Thanks for working on this!
> 
> Thanks for reviewing the v7 patch series!
> 
> > > On Thu, 7 Nov 2024 at 23:00, Fujii Masao <[email protected]>
> > > wrote:
> > >>
> > >>
> > >>
> > >> On 2024/10/26 6:03, Kirill Reshke wrote:
> > >> > when the REJECT LIMIT is set to some non-zero number and the number of
> > >> > row NULL replacements exceeds the limit, is it OK to fail. Because
> > >> > there WAS errors, and we should not tolerate more than $limit errors .
> > >> > I do find this behavior to be consistent.
> > >>
> > >> +1
> > >>
> > >>
> > >> > But what if we don't set a REJECT LIMIT, it is sane to do all
> > >> > replacements, as if REJECT LIMIT is inf.
> > >>
> > >> +1
> > >
> > > After thinking for a while, I'm now more opposed to this approach. I
> > > think we should count rows with erroneous data as errors only if
> > > null substitution for these rows failed, not the total number of rows
> > > which were modified.
> > > Then, to respect the REJECT LIMIT option, we compare this number with
> > > the limit. This is actually simpler approach IMHO. What do You think?
> >
> > IMHO I prefer the previous interpretation.
> > I'm not sure this is what people expect, but I assume that REJECT_LIMIT
> > is used to specify how many malformed rows are acceptable in the
> > "original" data source.

I also prefer the previous version.
 
> I do like the first version of interpretation, but I have a struggle
> with it. According to this interpretation, we will fail COPY command
> if the number
> of malformed data rows exceeds the limit, not the number of rejected
> rows (some percentage of malformed rows are accepted with null
> substitution)
> So, a proper name for the limit will be MALFORMED_LIMIT, or something.
> However, we are unable to change the name since the REJECT_LIMIT
> option has already been committed.
> I guess I'll just have to put up with this contradiction. I will send
> an updated patch shortly...

I think we can rename the REJECT_LIMIT option because it is not yet released.

The documentation says that REJECT_LIMIT "Specifies the maximum number of errors",
and there are no wording "reject" in the description, so I wonder it is unclear
what means in "REJECT" in REJECT_LIMIT. It may be proper to use ERROR_LIMIT
since it is supposed to be used with ON_ERROR. 

Alternatively, if we emphasize that errors are handled other than terminating
the command,perhaps MALFORMED_LIMIT as proposed above or TOLERANCE_LIMIT may be
good, for example.

Regards,
Yugo Nagata

-- 
Yugo Nagata <[email protected]>






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-16 08:26  jian he <[email protected]>
  parent: Kirill Reshke <[email protected]>
  1 sibling, 1 reply; 29+ messages in thread

From: jian he @ 2024-11-16 08:26 UTC (permalink / raw)
  To: Kirill Reshke <[email protected]>; +Cc: Fujii Masao <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Sat, Nov 9, 2024 at 8:55 PM Kirill Reshke <[email protected]> wrote:
>

> > > But while I was trying to implement that, I realized that I don't
> > > understand v4 of this patch. My misunderstanding is about
> > > `t_on_error_null` tests. We are allowed to insert a NULL value for the
> > > first column of t_on_error_null using COPY ON_ERROR SET_TO_NULL. Why
> > > do we do that? My thought is we should try to execute
> > > InputFunctionCallSafe with NULL value  (i mean, here [1]) for the
> > > column after we failed to insert the input value. And, if this second
> > > call is successful, we do replacement, otherwise we count the row as
> > > erroneous.
> >
> > Your concern is valid. Allowing NULL to be stored in a column with a NOT NULL
> > constraint via COPY ON_ERROR=SET_TO_NULL does seem unexpected. As you suggested,
> > NULL values set by SET_TO_NULL should probably be re-evaluated.
>
> Thank you. I updated the patch with a NULL re-evaluation.
>


take me sometime to understand your change with InputFunctionCallSafe.
it actually works fine with domain,
i think mainly because domain_in proisstrict is false and all other
type input function proisstrict is true!


--case1
create table t1(a dnn);
copy t1 from stdin(on_error set_to_null);
A
\.

--case2
create table t2(a int not null);
copy t2 from stdin(on_error set_to_null);
A
\.

I think it should be to either let domains with not-null behave the
same as column level not-null
or just insert NULL to a column with domain not-null constraint.


in doc[1], we already mentioned that a column with a not-null domain
is possible to have null value .
https://www.postgresql.org/docs/current/sql-createdomain.html

attached v8, based on your v7, main change it NextCopyFrom,
InputFunctionCallSafe.
The idea is when InputFunctionCallSafe fails, on_error set_to_null
needs to check if this is a type as not-null domain.
pass NULL string to InputFunctionCallSafe again to check if this type
allows null or not.
If not allow null then error out (ereport(ERROR)).
i think this will align with column level not-null constraint, what do
you guys think?


i am mainly change copyfromparse.c's for now.
other places no change, same as v7.


Attachments:

  [text/x-patch] v8-0001-COPY-option-on_error-set_to_null.patch (20.2K, 2-v8-0001-COPY-option-on_error-set_to_null.patch)
  download | inline diff:
From d0553ec7446f0f352a281c591db228029c0946c4 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Sat, 16 Nov 2024 15:57:59 +0800
Subject: [PATCH v8 1/1] COPY option on_error set_to_null

extent "on_error action", introduce new option: on_error set_to_null.

Due to current grammar, we cannot use "on_error null", so I choose on_error
set_to_null.

any data type conversion errors while the COPY FROM process will set that column
value to be NULL.  this will only work with COPY FROM and non-binary format.

However this will respect the not-null constraint, meaning, if you actually
converted error to null, but the column has not-null constraint, not-null
constraint violation ERROR will be raised.

this also respect not-null constraint on domain, meaning on_error set_to_null
may raise ERROR for failed domain_in function call

discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  7 +--
 src/backend/commands/copy.c          | 12 +++--
 src/backend/commands/copyfrom.c      | 39 ++++++++++----
 src/backend/commands/copyfromparse.c | 40 ++++++++++++++
 src/bin/psql/tab-complete.in.c       |  2 +-
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 80 +++++++++++++++++++++++++++-
 src/test/regress/sql/copy2.sql       | 79 +++++++++++++++++++++++++++
 8 files changed, 239 insertions(+), 21 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f09..dcbfa17a3c 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,12 +394,13 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Specifies how to behave when encountering an error converting a column's
       input value into its data type.
       An <replaceable class="parameter">error_action</replaceable> value of
-      <literal>stop</literal> means fail the command, while
-      <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>stop</literal> means fail the command,
+      <literal>ignore</literal> means discard the input row and continue with the next one, and
+      <literal>set_to_null</literal> means replace columns containing erroneous input values with <literal>null</literal> and move to the next row.
       The default is <literal>stop</literal>.
      </para>
      <para>
-      The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
+      The <literal>ignore</literal> and <literal>set_to_null</literal> options are applicable only for <command>COPY FROM</command>
       when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663..304022cd86 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 				 parser_errposition(pstate, def->location)));
 
 	/*
-	 * Allow "stop", or "ignore" values.
+	 * Allow "stop", "ignore", "set_to_null" values.
 	 */
 	if (pg_strcasecmp(sval, "stop") == 0)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -904,13 +906,13 @@ ProcessCopyOptions(ParseState *pstate,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
 
-	if (opts_out->reject_limit && !opts_out->on_error)
+	if (opts_out->reject_limit && !(opts_out->on_error == COPY_ON_ERROR_NULL || opts_out->on_error == COPY_ON_ERROR_IGNORE))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 		/*- translator: first and second %s are the names of COPY option, e.g.
-		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */
-				 errmsg("COPY %s requires %s to be set to %s",
-						"REJECT_LIMIT", "ON_ERROR", "IGNORE")));
+		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE or SET_TO_NULL */
+				 errmsg("COPY %s requires %s to be set to %s or %s",
+						"REJECT_LIMIT", "ON_ERROR", "IGNORE", "SET_TO_NULL")));
 }
 
 /*
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 07cbd5d22b..f00f383baa 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1003,7 +1003,7 @@ CopyFrom(CopyFromState cstate)
 		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
 			break;
 
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
+		if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) &&
 			cstate->escontext->error_occurred)
 		{
 			/*
@@ -1018,12 +1018,28 @@ CopyFrom(CopyFromState cstate)
 			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
 										 cstate->num_errors);
 
-			if (cstate->opts.reject_limit > 0 && \
-				cstate->num_errors > cstate->opts.reject_limit)
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
-								(long long) cstate->opts.reject_limit)));
+			if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+			{
+				if (cstate->opts.reject_limit > 0 && cstate->num_errors > cstate->opts.reject_limit)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
+									(long long) cstate->opts.reject_limit)));
+			}
+			else
+			{
+				/* Provide different error msg if reject_limit is zero */
+				if (cstate->opts.reject_limit == 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("failed to replace column containing erroneous data with null"),
+							errhint("Consider specifying the REJECT LIMIT option to skip erroneous rows.")));
+				else if (cstate->num_errors > cstate->opts.reject_limit)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("encountered more than REJECT_LIMIT (%lld) rows with data type incompatibility",
+									(long long) cstate->opts.reject_limit)));
+			}
 
 			/* Repeat NextCopyFrom() until no soft error occurs */
 			continue;
@@ -1321,7 +1337,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) &&
 		cstate->num_errors > 0 &&
 		cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
 		ereport(NOTICE,
@@ -1474,10 +1490,11 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+			cstate->opts.on_error == COPY_ON_ERROR_NULL)
 			cstate->escontext->details_wanted = false;
 	}
 	else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index d1d43b53d8..142fdf3fbb 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -960,6 +960,46 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
+				/*
+				* We encountered an error while parsing one of attributes.
+				*/
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+					/*
+					* Temporary unset error_occurred. for basetype, null value
+					* is allowed, for domain type, we may have not-null
+					* constraint. we pass NULL to InputFunctionCallSafe to check
+					* if this type have not-null constraint or not. If it's
+					* domain with not-null constraint, then we have to error
+					* out, that would behave consistent with column level
+					* not-null constraint
+					*/
+					cstate->escontext->error_occurred = false;
+
+					if (!InputFunctionCallSafe(&in_functions[m],
+											NULL,
+											typioparams[m],
+											att->atttypmod,
+											(Node *) cstate->escontext,
+											&values[m]))
+						ereport(ERROR,
+								(errcode(ERRCODE_NOT_NULL_VIOLATION),
+								 errmsg("domain %s does not allow null values",
+										format_type_be(typioparams[m])),
+								 errdatatype(typioparams[m])));
+
+					/* If datatype if okay with NULL, replace
+					* with null
+					*/
+					nulls[m] = true;
+					values[m] = (Datum) 0;
+					continue;
+				}
+
+				/*
+				* Update copy state counter for number of erroneous rows
+				* as we are going to return from function.
+				*/
 				cstate->num_errors++;
 
 				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index fad2277991..c2902ffc33 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id,
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
 					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
-					  "ON_ERROR", "LOG_VERBOSITY");
+					  "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 4002a7f538..051ca12d10 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae..62cbe0c2b3 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+                                                 ^
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +96,8 @@ COPY x from stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
 ERROR:  COPY ON_ERROR "unsupported" not recognized
 LINE 1: COPY x from stdin (on_error unsupported);
@@ -124,12 +130,16 @@ COPY x to stdout (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdout (format BINARY, on_error unsupported);
                                          ^
+COPY x to stdin (on_error set_to_null);
+ERROR:  COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+                         ^
 COPY x from stdin (log_verbosity unsupported);
 ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
 LINE 1: COPY x from stdin (log_verbosity unsupported);
                            ^
 COPY x from stdin with (reject_limit 1);
-ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
+ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE or SET_TO_NULL
 COPY x from stdin with (on_error ignore, reject_limit 0);
 ERROR:  REJECT_LIMIT (0) must be greater than zero
 -- too many columns in column list: should fail
@@ -769,6 +779,46 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column "k": "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-- check inserted content
+TABLE t_on_error_null;
+ a  | b  | c  
+----+----+----
+ 11 |    | 12
+  1 | 11 |   
+(2 rows)
+
+--fail. we do check domain not-null constraint
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  domain d_int_not_null does not allow null values
+CONTEXT:  COPY t_on_error_null, line 1, column a: "a"
+--fail. first check constraint fails, then we convert column a value to null
+-- by on_error set_to_nul but column a domain type not allow null value.
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  domain d_int_not_null does not allow null values
+CONTEXT:  COPY t_on_error_null, line 1, column a: "-1"
+--ok. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+ERROR:  domain d_int_not_null does not allow null values
+CONTEXT:  COPY t_on_error_null, line 1, column a: "-1"
+-- check inserted content
+TABLE t_on_error_null;
+ a  | b  | c  
+----+----+----
+ 11 |    | 12
+  1 | 11 |   
+(2 rows)
+
+--fail. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+ERROR:  domain d_int_not_null does not allow null values
+CONTEXT:  COPY t_on_error_null, line 1, column a: "-1"
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -776,6 +826,10 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 NOTICE:  skipping row due to data type incompatibility at line 2 for column "l": null input
 CONTEXT:  COPY check_ign_err2
 NOTICE:  1 row was skipped due to data type incompatibility
+-- check null substitution massages.
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose);
+ERROR:  domain dcheck_ign_err2 does not allow null values
+CONTEXT:  COPY check_ign_err2, line 2, column l: null input
 COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
 -- reset context choice
 \set SHOW_CONTEXT errors
@@ -813,6 +867,28 @@ ERROR:  skipped more than REJECT_LIMIT (3) rows due to data type incompatibility
 CONTEXT:  COPY check_ign_err, line 5, column n: ""
 COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4);
 NOTICE:  4 rows were skipped due to data type incompatibility
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -828,6 +904,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
 DROP TABLE check_ign_err2;
 DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce..c9884cc169 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x from stdin (format BINARY, delimiter ',');
 COPY x from stdin (format BINARY, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
 COPY x from stdin (on_error unsupported);
 COPY x from stdin (format TEXT, force_quote(a));
 COPY x from stdin (format TEXT, force_quote *);
@@ -87,6 +89,7 @@ COPY x from stdin (format TEXT, force_null *);
 COPY x to stdout (format CSV, force_null(a));
 COPY x to stdout (format CSV, force_null *);
 COPY x to stdout (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
@@ -534,6 +537,51 @@ a	{2}	2
 8	{8}	8
 \.
 
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+1	11	d
+\.
+
+-- check inserted content
+TABLE t_on_error_null;
+
+--fail. we do check domain not-null constraint
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+a	11	14
+\.
+
+--fail. first check constraint fails, then we convert column a value to null
+-- by on_error set_to_nul but column a domain type not allow null value.
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+\.
+
+--ok. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+-1	11	13
+a	11	14
+1	11	14
+\.
+
+-- check inserted content
+TABLE t_on_error_null;
+
+--fail. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+-1	11	13
+a	11	14
+\N	11	14
+\.
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -541,6 +589,12 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 1	{1}	1	'foo'
 2	{2}	2	\N
 \.
+-- check null substitution massages.
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose);
+1	{1}	foo	'foo'
+2	{2}	2	\N
+2	{2}	2	'foooooooooooooooo'
+\.
 COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
 3	{3}	3	'bar'
 4	{4}	4	\N
@@ -588,6 +642,29 @@ a	{7}	7
 10	{10}	10
 \.
 
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -603,6 +680,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
 DROP TABLE check_ign_err2;
 DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-16 09:55  Kirill Reshke <[email protected]>
  parent: jian he <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: Kirill Reshke @ 2024-11-16 09:55 UTC (permalink / raw)
  To: jian he <[email protected]>; +Cc: Fujii Masao <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Sat, 16 Nov 2024 at 13:27, jian he <[email protected]> wrote:
>
> On Sat, Nov 9, 2024 at 8:55 PM Kirill Reshke <[email protected]> wrote:
> >
>
> > > > But while I was trying to implement that, I realized that I don't
> > > > understand v4 of this patch. My misunderstanding is about
> > > > `t_on_error_null` tests. We are allowed to insert a NULL value for the
> > > > first column of t_on_error_null using COPY ON_ERROR SET_TO_NULL. Why
> > > > do we do that? My thought is we should try to execute
> > > > InputFunctionCallSafe with NULL value  (i mean, here [1]) for the
> > > > column after we failed to insert the input value. And, if this second
> > > > call is successful, we do replacement, otherwise we count the row as
> > > > erroneous.
> > >
> > > Your concern is valid. Allowing NULL to be stored in a column with a NOT NULL
> > > constraint via COPY ON_ERROR=SET_TO_NULL does seem unexpected. As you suggested,
> > > NULL values set by SET_TO_NULL should probably be re-evaluated.
> >
> > Thank you. I updated the patch with a NULL re-evaluation.
> >
>
>
> take me sometime to understand your change with InputFunctionCallSafe.
> it actually works fine with domain,
> i think mainly because domain_in proisstrict is false and all other
> type input function proisstrict is true!
>
>
> --case1
> create table t1(a dnn);
> copy t1 from stdin(on_error set_to_null);
> A
> \.
>
> --case2
> create table t2(a int not null);
> copy t2 from stdin(on_error set_to_null);
> A
> \.
>
> I think it should be to either let domains with not-null behave the
> same as column level not-null
> or just insert NULL to a column with domain not-null constraint.
>
>
> in doc[1], we already mentioned that a column with a not-null domain
> is possible to have null value .
> https://www.postgresql.org/docs/current/sql-createdomain.html
>
> attached v8, based on your v7, main change it NextCopyFrom,
> InputFunctionCallSafe.
> The idea is when InputFunctionCallSafe fails, on_error set_to_null
> needs to check if this is a type as not-null domain.
> pass NULL string to InputFunctionCallSafe again to check if this type
> allows null or not.
> If not allow null then error out (ereport(ERROR)).
> i think this will align with column level not-null constraint, what do
> you guys think?
>
>
> i am mainly change copyfromparse.c's for now.
> other places no change, same as v7.

Hello. I received your email just as I was ready to send my version
eight of this thread.

Your patch does not apply due to 9a70f67.

```
reshke@ygp-jammy:~/pg$ git apply v8-0001-COPY-option-on_error-set_to_null.patch
error: patch failed: src/backend/commands/copyfrom.c:1018
error: src/backend/commands/copyfrom.c: patch does not apply
```

1) Your v8 does not fix tab-complete issue mentioned by Atsushi
Torikoshi in [1].

2) Your version does not address discussion about SET_TO_NULL vs
REJECT_LIMIT (see [2] & [3]). I am leaning towards option 1 from [3],
and my v8 implements that.

```
reshke=# create domain dd as int not null;
CREATE DOMAIN
reshke=# create table tt(i dd);
CREATE TABLE
reshke=# copy tt from stdin with (on_error set_to_null, log_verbosity
verbose, reject_limit 2);
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself, or an EOF signal.
>> s
>> 1
>> \.
ERROR:  domain dd does not allow null values
CONTEXT:  COPY tt, line 1, column i: "s"
reshke=#
```

I expect no error here, as reject_limit is specified.

Regression test that checks for this in v7 were changed, in my
opinion, incorrectly.

I am attaching my v8 for reference.


[1] https://www.postgresql.org/message-id/501dd655ddb04693c15baeb6485bc601%40oss.nttdata.com
[2] https://www.postgresql.org/message-id/07587c36-18b3-4ccb-b5fb-579bcb04ed37%40oss.nttdata.com
[3] https://www.postgresql.org/message-id/1462d79784b2475f1c714c65a6f25652%40oss.nttdata.com
-- 
Best regards,
Kirill Reshke


Attachments:

  [application/octet-stream] v8-0001-Introduce-COPY-option-to-replace-columns-containi.patch (24.7K, 2-v8-0001-Introduce-COPY-option-to-replace-columns-containi.patch)
  download | inline diff:
From df39b38589fc8384e0590d76f8b0e2e4295f6f50 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Thu, 12 Sep 2024 17:07:02 +0800
Subject: [PATCH v8] Introduce COPY option to replace columns containing
 erroneous data with null

extent "on_error action", introduce new option: on_error set_to_null.

Due to current grammar, we cannot use "on_error null",
so I choose on_error set_to_null.

any data type conversion errors while the COPY FROM process will set that column value to be NULL.
this will only work with COPY FROM and non-binary format.

However this will respect the not-null constraint, meaning, if you actually converted error to null,
but the column has not-null constraint, not-null constraint violation ERROR will be raised.

discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |   9 +--
 src/backend/commands/copy.c          |  12 ++--
 src/backend/commands/copyfrom.c      |  53 +++++++++++---
 src/backend/commands/copyfromparse.c |  69 +++++++++++++++++-
 src/bin/psql/tab-complete.in.c       |   2 +-
 src/include/commands/copy.h          |   1 +
 src/test/regress/expected/copy2.out  | 100 ++++++++++++++++++++++++++-
 src/test/regress/sql/copy2.sql       |  82 ++++++++++++++++++++++
 8 files changed, 303 insertions(+), 25 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f096..bce10ea62e5 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,12 +394,13 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Specifies how to behave when encountering an error converting a column's
       input value into its data type.
       An <replaceable class="parameter">error_action</replaceable> value of
-      <literal>stop</literal> means fail the command, while
-      <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>stop</literal> means fail the command,
+      <literal>ignore</literal> means discard the input row and continue with the next one, and
+      <literal>set_to_null</literal> means replace columns containing erroneous input values with <literal>null</literal> and move to the next row.
       The default is <literal>stop</literal>.
      </para>
      <para>
-      The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
+      The <literal>ignore</literal> and <literal>set_to_null</literal> options are applicable only for <command>COPY FROM</command>
       when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
      </para>
      <para>
@@ -422,7 +423,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Specifies the maximum number of errors tolerated while converting a
       column's input value to its data type, when <literal>ON_ERROR</literal> is
       set to <literal>ignore</literal>.
-      If the input causes more errors than the specified value, the <command>COPY</command>
+      If the input contains more erroneous rows than the specified value, the <command>COPY</command>
       command fails, even with <literal>ON_ERROR</literal> set to <literal>ignore</literal>.
       This clause must be used with <literal>ON_ERROR</literal>=<literal>ignore</literal>
       and <replaceable class="parameter">maxerror</replaceable> must be positive <type>bigint</type>.
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663f..304022cd867 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 				 parser_errposition(pstate, def->location)));
 
 	/*
-	 * Allow "stop", or "ignore" values.
+	 * Allow "stop", "ignore", "set_to_null" values.
 	 */
 	if (pg_strcasecmp(sval, "stop") == 0)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -904,13 +906,13 @@ ProcessCopyOptions(ParseState *pstate,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
 
-	if (opts_out->reject_limit && !opts_out->on_error)
+	if (opts_out->reject_limit && !(opts_out->on_error == COPY_ON_ERROR_NULL || opts_out->on_error == COPY_ON_ERROR_IGNORE))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 		/*- translator: first and second %s are the names of COPY option, e.g.
-		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */
-				 errmsg("COPY %s requires %s to be set to %s",
-						"REJECT_LIMIT", "ON_ERROR", "IGNORE")));
+		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE or SET_TO_NULL */
+				 errmsg("COPY %s requires %s to be set to %s or %s",
+						"REJECT_LIMIT", "ON_ERROR", "IGNORE", "SET_TO_NULL")));
 }
 
 /*
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 754cb496169..557b00266ff 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1003,7 +1003,7 @@ CopyFrom(CopyFromState cstate)
 		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
 			break;
 
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
+		if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) &&
 			cstate->escontext->error_occurred)
 		{
 			/*
@@ -1018,12 +1018,30 @@ CopyFrom(CopyFromState cstate)
 			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
 										 cstate->num_errors);
 
-			if (cstate->opts.reject_limit > 0 &&
-				cstate->num_errors > cstate->opts.reject_limit)
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
-								(long long) cstate->opts.reject_limit)));
+			if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+			{
+				if (cstate->opts.reject_limit > 0 &&
+					cstate->num_errors > cstate->opts.reject_limit)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
+									(long long) cstate->opts.reject_limit)));
+			}
+			else
+			{
+				/* Provide different error msg if reject_limit is zero */
+				if (cstate->opts.reject_limit == 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("failed to replace column containing erroneous data with null",
+									(long long) cstate->opts.reject_limit),
+							errhint("Consider specifying the REJECT_LIMIT option to skip erroneous rows.")));
+				else if (cstate->num_errors > cstate->opts.reject_limit)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							errmsg("encountered more than REJECT_LIMIT (%lld) rows with data type incompatibility",
+									(long long) cstate->opts.reject_limit)));
+			}
 
 			/* Repeat NextCopyFrom() until no soft error occurs */
 			continue;
@@ -1321,7 +1339,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
 		cstate->num_errors > 0 &&
 		cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
 		ereport(NOTICE,
@@ -1330,6 +1348,18 @@ CopyFrom(CopyFromState cstate)
 							  (unsigned long long) cstate->num_errors,
 							  (unsigned long long) cstate->num_errors));
 
+	/* In case on_error SET_TO_NULL, if COPY succeed, it means that
+	 * all erroneous rows attributes filled with NULL
+	 */
+	if (cstate->opts.on_error == COPY_ON_ERROR_NULL &&
+		cstate->num_errors > 0 &&
+		cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
+		ereport(NOTICE,
+				errmsg_plural("Erroneous values in %llu row were replaced with NULL",
+							  "Erroneous values in %llu rows were replaced with NULL",
+							  (unsigned long long) cstate->num_errors,
+							  (unsigned long long) cstate->num_errors));
+
 	if (bistate != NULL)
 		FreeBulkInsertState(bistate);
 
@@ -1474,10 +1504,11 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+			cstate->opts.on_error == COPY_ON_ERROR_NULL)
 			cstate->escontext->details_wanted = false;
 	}
 	else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index d1d43b53d83..943926ad1d9 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -871,6 +871,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 		int			fldct;
 		int			fieldno;
 		char	   *string;
+		bool		current_row_erroneous = false;
 
 		/* read raw fields in the next line */
 		if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
@@ -949,7 +950,8 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 
 			/*
 			 * If ON_ERROR is specified with IGNORE, skip rows with soft
-			 * errors
+			 * errors. If ON_ERROR is specified with SET_TO_NULL, try
+			 * to replace attribute value with NULL.
 			 */
 			else if (!InputFunctionCallSafe(&in_functions[m],
 											string,
@@ -960,9 +962,63 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
-				cstate->num_errors++;
+				/*
+				* Regardless of NULL substrition success, we count
+				* current row as erroneous
+				*/
+				current_row_erroneous = true;
 
-				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+				/*
+				* We encountered an error while parsing one of attributes.
+				*
+				*/
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL && string != NULL)
+				{
+					/*
+					* Temporary unset error_occurred, for next InputFunctionCallSafe
+					* sanity. If null substitution for this attribute will
+					* succeed, we will accept this row.
+					*/
+					cstate->escontext->error_occurred = false;
+
+					if (InputFunctionCallSafe(&in_functions[m],
+											NULL,
+											typioparams[m],
+											att->atttypmod,
+											(Node *) cstate->escontext,
+											&values[m]))
+					{
+						/* If datatype if okay with NULL, replace
+						* with null
+						*/
+						nulls[m] = true;
+
+						if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+							ereport(NOTICE,
+									errmsg("replaced row attribute \"%s\" with NULL due to data type incompatibility at line %llu.",
+										cstate->cur_attname, (unsigned long long) cstate->cur_lineno));
+						continue;
+					}
+
+					if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+						ereport(NOTICE,
+								errmsg("failed to replace row attribute \"%s\" with NULL at line %llu.",
+									   cstate->cur_attname, (unsigned long long)cstate->cur_lineno));
+				}
+
+				/*
+				* Here we end processing of current COPY row.
+				* Update copy state counter for number of erroneous rows.
+				*/
+				cstate->num_errors++;
+				cstate->escontext->error_occurred = true;
+
+				/* Only print this NOTICE message, if it will not be followed by ERROR */
+				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE &&
+					 (
+						 (cstate->opts.on_error == COPY_ON_ERROR_NULL && cstate->opts.reject_limit > 0 && cstate->num_errors <= cstate->opts.reject_limit) ||
+						 (cstate->opts.on_error == COPY_ON_ERROR_IGNORE && (cstate->opts.reject_limit ==  0 || cstate->num_errors <= cstate->opts.reject_limit))
+					 ))
 				{
 					/*
 					 * Since we emit line number and column info in the below
@@ -1001,6 +1057,13 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			cstate->cur_attval = NULL;
 		}
 
+		/*
+		* Update copy state counter for number of erroneous rows.
+		* But do not set error_occurred, since row was actually accepted
+		*/
+		if (current_row_erroneous)
+			cstate->num_errors++;
+
 		Assert(fieldno == attr_count);
 	}
 	else
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index fad2277991d..c2902ffc339 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id,
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
 					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
-					  "ON_ERROR", "LOG_VERBOSITY");
+					  "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 4002a7f5382..051ca12d107 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae8..d9d64082478 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+                                                 ^
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +96,8 @@ COPY x from stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
 ERROR:  COPY ON_ERROR "unsupported" not recognized
 LINE 1: COPY x from stdin (on_error unsupported);
@@ -124,12 +130,16 @@ COPY x to stdout (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdout (format BINARY, on_error unsupported);
                                          ^
+COPY x to stdin (on_error set_to_null);
+ERROR:  COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+                         ^
 COPY x from stdin (log_verbosity unsupported);
 ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
 LINE 1: COPY x from stdin (log_verbosity unsupported);
                            ^
 COPY x from stdin with (reject_limit 1);
-ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
+ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE or SET_TO_NULL
 COPY x from stdin with (on_error ignore, reject_limit 0);
 ERROR:  REJECT_LIMIT (0) must be greater than zero
 -- too many columns in column list: should fail
@@ -769,6 +779,50 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column "k": "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+\pset null NULL
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+NOTICE:  Erroneous values in 1 row were replaced with NULL
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+NOTICE:  Erroneous values in 1 row were replaced with NULL
+-- check inserted content
+TABLE t_on_error_null;
+ a  |  b   |  c   
+----+------+------
+ 11 | NULL |   12
+  1 |   11 | NULL
+(2 rows)
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  failed to replace column containing erroneous data with null
+HINT:  Consider specifying the REJECT LIMIT option to skip erroneous rows.
+CONTEXT:  COPY t_on_error_null, line 1, column a: "a"
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  failed to replace column containing erroneous data with null
+HINT:  Consider specifying the REJECT LIMIT option to skip erroneous rows.
+CONTEXT:  COPY t_on_error_null, line 1, column a: "-1"
+--ok. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+NOTICE:  Erroneous values in 2 rows were replaced with NULL
+-- check inserted content
+TABLE t_on_error_null;
+ a  |  b   |  c   
+----+------+------
+ 11 | NULL |   12
+  1 |   11 | NULL
+  1 |   11 |   14
+(3 rows)
+
+--fail. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+ERROR:  encountered more than REJECT_LIMIT (2) rows with data type incompatibility
+CONTEXT:  COPY t_on_error_null, line 3, column a: null input
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -776,6 +830,26 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 NOTICE:  skipping row due to data type incompatibility at line 2 for column "l": null input
 CONTEXT:  COPY check_ign_err2
 NOTICE:  1 row was skipped due to data type incompatibility
+-- check null substitution massages.
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, log_verbosity verbose);
+NOTICE:  failed to replace row attribute "l" with NULL at line 1.
+CONTEXT:  COPY check_ign_err2, line 1, column l: "'foooooooooooooooo'"
+ERROR:  failed to replace column containing erroneous data with null
+HINT:  Consider specifying the REJECT LIMIT option to skip erroneous rows.
+CONTEXT:  COPY check_ign_err2, line 1, column l: "'foooooooooooooooo'"
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose);
+NOTICE:  failed to replace row attribute "l" with NULL at line 1.
+CONTEXT:  COPY check_ign_err2, line 1, column l: "'foooooooooooooooo'"
+NOTICE:  skipping row due to data type incompatibility at line 1 for column "l": "'foooooooooooooooo'"
+CONTEXT:  COPY check_ign_err2
+NOTICE:  failed to replace row attribute "l" with NULL at line 2.
+CONTEXT:  COPY check_ign_err2, line 2, column l: "'foooooooooooooooo'"
+NOTICE:  skipping row due to data type incompatibility at line 2 for column "l": "'foooooooooooooooo'"
+CONTEXT:  COPY check_ign_err2
+NOTICE:  failed to replace row attribute "l" with NULL at line 3.
+CONTEXT:  COPY check_ign_err2, line 3, column l: "'foooooooooooooooo'"
+ERROR:  encountered more than REJECT_LIMIT (2) rows with data type incompatibility
+CONTEXT:  COPY check_ign_err2, line 3, column l: "'foooooooooooooooo'"
 COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
 -- reset context choice
 \set SHOW_CONTEXT errors
@@ -813,6 +887,28 @@ ERROR:  skipped more than REJECT_LIMIT (3) rows due to data type incompatibility
 CONTEXT:  COPY check_ign_err, line 5, column n: ""
 COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4);
 NOTICE:  4 rows were skipped due to data type incompatibility
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+NOTICE:  Erroneous values in 3 rows were replaced with NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -828,6 +924,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
 DROP TABLE check_ign_err2;
 DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce0..d1dd61b4bf2 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x from stdin (format BINARY, delimiter ',');
 COPY x from stdin (format BINARY, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
 COPY x from stdin (on_error unsupported);
 COPY x from stdin (format TEXT, force_quote(a));
 COPY x from stdin (format TEXT, force_quote *);
@@ -87,6 +89,7 @@ COPY x from stdin (format TEXT, force_null *);
 COPY x to stdout (format CSV, force_null(a));
 COPY x to stdout (format CSV, force_null *);
 COPY x to stdout (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
@@ -534,6 +537,52 @@ a	{2}	2
 8	{8}	8
 \.
 
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+
+\pset null NULL
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+1	11	d
+\.
+
+-- check inserted content
+TABLE t_on_error_null;
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+a	11	14
+\.
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+\.
+
+--ok. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+-1	11	13
+a	11	14
+1	11	14
+\.
+
+-- check inserted content
+TABLE t_on_error_null;
+
+--fail. Check interaction with REJECT LIMIT
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2);
+-1	11	13
+a	11	14
+\N	11	14
+\.
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -541,6 +590,15 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 1	{1}	1	'foo'
 2	{2}	2	\N
 \.
+-- check null substitution massages.
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, log_verbosity verbose);
+2	{2}	2	'foooooooooooooooo'
+\.
+COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose);
+2	{2}	2	'foooooooooooooooo'
+2	{2}	2	'foooooooooooooooo'
+2	{2}	2	'foooooooooooooooo'
+\.
 COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
 3	{3}	3	'bar'
 4	{4}	4	\N
@@ -588,6 +646,28 @@ a	{7}	7
 10	{10}	10
 \.
 
+-- tests for on_error set_to_null option
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -603,6 +683,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
 DROP TABLE check_ign_err2;
 DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
-- 
2.34.1



^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-19 08:52  jian he <[email protected]>
  parent: Kirill Reshke <[email protected]>
  0 siblings, 1 reply; 29+ messages in thread

From: jian he @ 2024-11-19 08:52 UTC (permalink / raw)
  To: Kirill Reshke <[email protected]>; +Cc: Fujii Masao <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Sat, Nov 16, 2024 at 5:55 PM Kirill Reshke <[email protected]> wrote:
>
> I am attaching my v8 for reference.
>

in your v8.

   <varlistentry>
    <term><literal>REJECT_LIMIT</literal></term>
    <listitem>
     <para>
      Specifies the maximum number of errors tolerated while converting a
      column's input value to its data type, when <literal>ON_ERROR</literal> is
      set to <literal>ignore</literal>.
      If the input contains more erroneous rows than the specified
value, the <command>COPY</command>
      command fails, even with <literal>ON_ERROR</literal> set to
<literal>ignore</literal>.
     </para>
    </listitem>
   </varlistentry>

then above description not meet with following example, (i think)

create table t(a int not null);
COPY t FROM STDIN WITH (on_error set_to_null, reject_limit 2);
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself, or an EOF signal.
>> a
>> \.
ERROR:  null value in column "a" of relation "t" violates not-null constraint
DETAIL:  Failing row contains (null).
CONTEXT:  COPY t, line 1, column a: "a"

Overall, I think
making the domain not-null align with column level not-null would be a
good thing.


     <para>
      Specifies how to behave when encountering an error converting a column's
      input value into its data type.
      An <replaceable class="parameter">error_action</replaceable> value of
      <literal>stop</literal> means fail the command,
      <literal>ignore</literal> means discard the input row and
continue with the next one, and
      <literal>set_to_null</literal> means replace columns containing
erroneous input values with <literal>null</literal> and move to the
next row.

"and move to the next row" is wrong?
I think it should be " and move to the next field".






^ permalink  raw  reply  [nested|flat] 29+ messages in thread

* Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row
@ 2024-11-19 20:02  Kirill Reshke <[email protected]>
  parent: jian he <[email protected]>
  0 siblings, 0 replies; 29+ messages in thread

From: Kirill Reshke @ 2024-11-19 20:02 UTC (permalink / raw)
  To: jian he <[email protected]>; +Cc: Fujii Masao <[email protected]>; Jim Jones <[email protected]>; David G. Johnston <[email protected]>; Yugo NAGATA <[email protected]>; torikoshia <[email protected]>; PostgreSQL Hackers <[email protected]>

On Tue, 19 Nov 2024, 13:52 jian he, <[email protected]> wrote:
>
> On Sat, Nov 16, 2024 at 5:55 PM Kirill Reshke <[email protected]> wrote:
> >
> > I am attaching my v8 for reference.
> >
>
> in your v8.
>
>    <varlistentry>
>     <term><literal>REJECT_LIMIT</literal></term>
>     <listitem>
>      <para>
>       Specifies the maximum number of errors tolerated while converting a
>       column's input value to its data type, when <literal>ON_ERROR</literal> is
>       set to <literal>ignore</literal>.
>       If the input contains more erroneous rows than the specified
> value, the <command>COPY</command>
>       command fails, even with <literal>ON_ERROR</literal> set to
> <literal>ignore</literal>.
>      </para>
>     </listitem>
>    </varlistentry>
>
> then above description not meet with following example, (i think)
>
> create table t(a int not null);
> COPY t FROM STDIN WITH (on_error set_to_null, reject_limit 2);
> Enter data to be copied followed by a newline.
> End with a backslash and a period on a line by itself, or an EOF signal.
> >> a
> >> \.
> ERROR:  null value in column "a" of relation "t" violates not-null constraint
> DETAIL:  Failing row contains (null).
> CONTEXT:  COPY t, line 1, column a: "a"


Sure, my v8 does not helps with column level NOT NULL constraint (or
other constraint)

> Overall, I think
> making the domain not-null align with column level not-null would be a
> good thing.

While this looks sane, it's actually a separate topic. Even on current
HEAD we have domain not-null vs column level not-null unalignment.

consider this example


```
reshke=# create table ftt2 (i int not null);
CREATE TABLE
reshke=# copy ftt2 from stdin with (reject_limit 1000, on_error ignore);
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself, or an EOF signal.
>> \N
>> \.
ERROR:  null value in column "i" of relation "ftt2" violates not-null constraint
DETAIL:  Failing row contains (null).
CONTEXT:  COPY ftt2, line 1: "\N"
reshke=# create domain dd as int not null ;
CREATE DOMAIN
reshke=# create table ftt3(i dd);
CREATE TABLE
reshke=# copy ftt3 from stdin with (reject_limit 1000, on_error ignore);
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself, or an EOF signal.
>> \N
>> \.
NOTICE:  1 row was skipped due to data type incompatibility
COPY 0
reshke=#
```
So, if we want this, we need to start another thread and deal with
REJECT_LIMIT + on_error ignore first.

The ExecConstraints function is the source of the error in scenario 1.
Therefore, we require something like "ExecConstraintsSafe" to
accommodate the aforementioned. That is a significant change. Not sure
it will be accepted by the community.

>
>      <para>
>       Specifies how to behave when encountering an error converting a column's
>       input value into its data type.
>       An <replaceable class="parameter">error_action</replaceable> value of
>       <literal>stop</literal> means fail the command,
>       <literal>ignore</literal> means discard the input row and
> continue with the next one, and
>       <literal>set_to_null</literal> means replace columns containing
> erroneous input values with <literal>null</literal> and move to the
> next row.
>
> "and move to the next row" is wrong?
> I think it should be " and move to the next field".

Yes, "and move to the next field" is correct.






^ permalink  raw  reply  [nested|flat] 29+ messages in thread


end of thread, other threads:[~2024-11-19 20:02 UTC | newest]

Thread overview: 29+ messages (download: mbox mbox.gz follow: Atom feed)
-- links below jump to the message on this page --
2024-02-03 06:22 Re: Change COPY ... ON_ERROR ignore to ON_ERROR ignore_row jian he <[email protected]>
2024-02-05 02:28 ` torikoshia <[email protected]>
2024-02-05 06:26   ` jian he <[email protected]>
2024-02-06 10:19     ` Yugo NAGATA <[email protected]>
2024-02-12 00:00       ` jian he <[email protected]>
2024-02-16 20:16         ` Jim Jones <[email protected]>
2024-02-16 20:31           ` David G. Johnston <[email protected]>
2024-02-16 23:05             ` Jim Jones <[email protected]>
2024-08-26 00:00               ` jian he <[email protected]>
2024-09-09 14:33                 ` Jim Jones <[email protected]>
2024-09-12 10:13                   ` jian he <[email protected]>
2024-09-16 10:38                     ` Jim Jones <[email protected]>
2024-10-21 09:30                     ` Kirill Reshke <[email protected]>
2024-10-21 12:39                       ` Fujii Masao <[email protected]>
2024-10-25 21:03                         ` Kirill Reshke <[email protected]>
2024-11-07 18:00                           ` Fujii Masao <[email protected]>
2024-11-09 12:55                             ` Kirill Reshke <[email protected]>
2024-11-11 11:11                               ` torikoshia <[email protected]>
2024-11-11 20:27                                 ` Kirill Reshke <[email protected]>
2024-11-12 05:03                                   ` Yugo Nagata <[email protected]>
2024-11-16 08:26                               ` jian he <[email protected]>
2024-11-16 09:55                                 ` Kirill Reshke <[email protected]>
2024-11-19 08:52                                   ` jian he <[email protected]>
2024-11-19 20:02                                     ` Kirill Reshke <[email protected]>
2024-10-27 11:32                         ` jian he <[email protected]>
2024-02-05 08:22   ` Yugo NAGATA <[email protected]>
2024-02-06 00:39     ` Kyotaro Horiguchi <[email protected]>
2024-02-06 07:46       ` Yugo NAGATA <[email protected]>
2024-02-06 08:38         ` jian he <[email protected]>

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox