From e866e079eae53b998945f98b637ffe24c571762e Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Tue, 19 Nov 2024 12:25:53 +0800
Subject: [PATCH v13 2/3] introduce json format for COPY TO

json format is only allowed in COPY TO operation.
also cannot be used with {header, default, null, delimiter} options.

discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com
discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c34c@joeconway.com
---
 doc/src/sgml/ref/copy.sgml         | 13 +++++--
 src/backend/commands/copy.c        | 29 +++++++++++++++
 src/backend/commands/copyto.c      | 51 ++++++++++++++++++++++---
 src/backend/parser/gram.y          |  8 ++++
 src/backend/utils/adt/json.c       |  5 +--
 src/bin/psql/tab-complete.in.c     |  2 +-
 src/include/commands/copy.h        |  1 +
 src/include/utils/json.h           |  2 +
 src/test/regress/expected/copy.out | 60 ++++++++++++++++++++++++++++++
 src/test/regress/sql/copy.sql      | 41 ++++++++++++++++++++
 10 files changed, 199 insertions(+), 13 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f09..5bf0f38d90 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -219,10 +219,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Selects the data format to be read or written:
       <literal>text</literal>,
       <literal>csv</literal> (Comma Separated Values),
+      <literal>json</literal> (JavaScript Object Notation),
       or <literal>binary</literal>.
       The default is <literal>text</literal>.
       See <xref linkend="sql-copy-file-formats"/> below for details.
      </para>
+     <para>
+      The <literal>json</literal> option is allowed only in
+      <command>COPY TO</command>.
+     </para>
     </listitem>
    </varlistentry>
 
@@ -257,7 +262,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       (line) of the file.  The default is a tab character in text format,
       a comma in <literal>CSV</literal> format.
       This must be a single one-byte character.
-      This option is not allowed when using <literal>binary</literal> format.
+      This option is not allowed when using <literal>binary</literal> or <literal>json</literal> format.
      </para>
     </listitem>
    </varlistentry>
@@ -271,7 +276,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       string in <literal>CSV</literal> format. You might prefer an
       empty string even in text format for cases where you don't want to
       distinguish nulls from empty strings.
-      This option is not allowed when using <literal>binary</literal> format.
+      This option is not allowed when using <literal>binary</literal> or <literal>json</literal> format.
      </para>
 
      <note>
@@ -294,7 +299,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       is found in the input file, the default value of the corresponding column
       will be used.
       This option is allowed only in <command>COPY FROM</command>, and only when
-      not using <literal>binary</literal> format.
+      not using <literal>binary</literal> or <literal>json</literal> format.
      </para>
     </listitem>
    </varlistentry>
@@ -310,7 +315,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       If this option is set to <literal>MATCH</literal>, the number and names
       of the columns in the header line must match the actual column names of
       the table, in order;  otherwise an error is raised.
-      This option is not allowed when using <literal>binary</literal> format.
+      This option is not allowed when using <literal>binary</literal> or <literal>json</literal> format.
       The <literal>MATCH</literal> option is only valid for <command>COPY
       FROM</command> commands.
      </para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index b7e819de40..4b8bc87666 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -516,6 +516,8 @@ ProcessCopyOptions(ParseState *pstate,
 				opts_out->format = COPY_FORMAT_CSV;
 			else if (strcmp(fmt, "binary") == 0)
 				opts_out->format = COPY_FORMAT_BINARY;
+			else if (strcmp(fmt, "json") == 0)
+				opts_out->format = COPY_FORMAT_JSON;
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -681,16 +683,32 @@ ProcessCopyOptions(ParseState *pstate,
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 				 errmsg("cannot specify %s in BINARY mode", "DELIMITER")));
 
+	if (opts_out->format == COPY_FORMAT_JSON && opts_out->delim)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
+				 errmsg("cannot specify %s in JSON mode", "DELIMITER")));
+
 	if (opts_out->format == COPY_FORMAT_BINARY && opts_out->null_print)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify %s in BINARY mode", "NULL")));
 
+	if (opts_out->format == COPY_FORMAT_JSON && opts_out->null_print)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify %s in JSON mode", "NULL")));
+
 	if (opts_out->format == COPY_FORMAT_BINARY && opts_out->default_print)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify %s in BINARY mode", "DEFAULT")));
 
+	if (opts_out->format == COPY_FORMAT_JSON && opts_out->default_print)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify %s in JSON mode", "DEFAULT")));
+
 	/* Set defaults for omitted options */
 	if (!opts_out->delim)
 		opts_out->delim = opts_out->format == COPY_FORMAT_CSV ? "," : "\t";
@@ -761,6 +779,11 @@ ProcessCopyOptions(ParseState *pstate,
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 				 errmsg("cannot specify %s in BINARY mode", "HEADER")));
 
+	if (opts_out->format == COPY_FORMAT_JSON && opts_out->header_line)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("cannot specify %s in JSON mode", "HEADER")));
+
 	/* Check quote */
 	if (opts_out->format != COPY_FORMAT_CSV && opts_out->quote != NULL)
 		ereport(ERROR,
@@ -864,6 +887,12 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY %s cannot be used with %s", "FREEZE",
 						"COPY TO")));
 
+	/* Check json format  */
+	if (opts_out->format == COPY_FORMAT_JSON && is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("COPY json mode cannot be used with %s", "COPY FROM")));
+
 	if (opts_out->default_print)
 	{
 		if (!is_from)
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 03c9d71d34..87709d76be 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -24,6 +24,7 @@
 #include "executor/execdesc.h"
 #include "executor/executor.h"
 #include "executor/tuptable.h"
+#include "funcapi.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
@@ -31,6 +32,7 @@
 #include "pgstat.h"
 #include "storage/fd.h"
 #include "tcop/tcopprot.h"
+#include "utils/json.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
@@ -139,9 +141,20 @@ SendCopyBegin(CopyToState cstate)
 
 	pq_beginmessage(&buf, PqMsg_CopyOutResponse);
 	pq_sendbyte(&buf, format);	/* overall format */
-	pq_sendint16(&buf, natts);
-	for (i = 0; i < natts; i++)
-		pq_sendint16(&buf, format); /* per-column formats */
+	if (cstate->opts.format != COPY_FORMAT_JSON)
+	{
+		pq_sendint16(&buf, natts);
+		for (i = 0; i < natts; i++)
+			pq_sendint16(&buf, format); /* per-column formats */
+	}
+	else
+	{
+		/*
+		 * JSON format is always one non-binary column
+		 */
+		pq_sendint16(&buf, 1);
+		pq_sendint16(&buf, 0);
+	}
 	pq_endmessage(&buf);
 	cstate->copy_dest = COPY_FRONTEND;
 }
@@ -921,7 +934,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 	/* Make sure the tuple is fully deconstructed */
 	slot_getallattrs(slot);
 
-	if (cstate->opts.format != COPY_FORMAT_BINARY)
+	if (cstate->opts.format == COPY_FORMAT_TEXT || cstate->opts.format == COPY_FORMAT_CSV)
 	{
 		bool		need_delim = false;
 
@@ -949,7 +962,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 			}
 		}
 	}
-	else
+	else if (cstate->opts.format == COPY_FORMAT_BINARY)
 	{
 		foreach_int(attnum, cstate->attnumlist)
 		{
@@ -969,6 +982,34 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 			}
 		}
 	}
+	else
+	{
+		Datum		rowdata;
+		StringInfo	result;
+
+		/*
+		 * if COPY TO source data is from a query, not a table (copy the_table
+		 * to stdout), then we need copy CopyToState->TupleDesc->attrs to
+		 * slot->tts_tupleDescriptor->attrs because the slot's TupleDesc->attrs
+		 * may change during query execution, but composite_to_json requires
+		 * correct TupleDesc->attrs for constructing the json keys.
+		 * composite_to_json will iterate each TupleDesc->attrs so no need to
+		 * copy other fields in cstate->queryDesc->tupDesc.
+		*/
+		if(!cstate->rel)
+		{
+			memcpy(TupleDescAttr(slot->tts_tupleDescriptor, 0),
+				TupleDescAttr(cstate->queryDesc->tupDesc, 0),
+				cstate->queryDesc->tupDesc->natts * sizeof(FormData_pg_attribute));
+
+			BlessTupleDesc(slot->tts_tupleDescriptor);
+		}
+		rowdata = ExecFetchSlotHeapTupleDatum(slot);
+		result = makeStringInfo();
+		composite_to_json(rowdata, result, false);
+
+		CopySendData(cstate, result->data, result->len);
+	}
 
 	CopySendEndOfRow(cstate);
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 67eb96396a..853532cf7d 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -3457,6 +3457,10 @@ copy_opt_item:
 				{
 					$$ = makeDefElem("format", (Node *) makeString("csv"), @1);
 				}
+			| JSON
+				{
+					$$ = makeDefElem("format", (Node *) makeString("json"), @1);
+				}
 			| HEADER_P
 				{
 					$$ = makeDefElem("header", (Node *) makeBoolean(true), @1);
@@ -3539,6 +3543,10 @@ copy_generic_opt_elem:
 				{
 					$$ = makeDefElem($1, $2, @1);
 				}
+			| FORMAT_LA copy_generic_opt_arg
+			{
+				$$ = makeDefElem("format", $2, @1);
+			}
 		;
 
 copy_generic_opt_arg:
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 058aade2af..5de9b86a96 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -85,8 +85,6 @@ typedef struct JsonAggState
 	JsonUniqueBuilderState unique_check;
 } JsonAggState;
 
-static void composite_to_json(Datum composite, StringInfo result,
-							  bool use_line_feeds);
 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
 							  Datum *vals, bool *nulls, int *valcount,
 							  JsonTypeCategory tcategory, Oid outfuncoid,
@@ -516,8 +514,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
 
 /*
  * Turn a composite / record into JSON.
+ * Exported so COPY TO can use it.
  */
-static void
+void
 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
 {
 	HeapTupleHeader td;
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index fad2277991..48cf854a1d 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3239,7 +3239,7 @@ match_previous_words(int pattern_id,
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
-		COMPLETE_WITH("binary", "csv", "text");
+		COMPLETE_WITH("binary", "csv", "text", "json");
 
 	/* Complete COPY <sth> FROM filename WITH (ON_ERROR */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "ON_ERROR"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index c3d1df267f..076ae59f96 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -59,6 +59,7 @@ typedef enum CopyFormat
 	COPY_FORMAT_TEXT = 0,
 	COPY_FORMAT_BINARY,
 	COPY_FORMAT_CSV,
+	COPY_FORMAT_JSON,
 } CopyFormat;
 
 /*
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index 79c1062e1b..c904ef6c6e 100644
--- a/src/include/utils/json.h
+++ b/src/include/utils/json.h
@@ -17,6 +17,8 @@
 #include "lib/stringinfo.h"
 
 /* functions in json.c */
+extern void composite_to_json(Datum composite, StringInfo result,
+							  bool use_line_feeds);
 extern void escape_json(StringInfo buf, const char *str);
 extern void escape_json_with_len(StringInfo buf, const char *str, int len);
 extern void escape_json_text(StringInfo buf, const text *txt);
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index f554d42c84..430f11f3f1 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -73,6 +73,66 @@ copy copytest3 to stdout csv header;
 c1,"col with , comma","col with "" quote"
 1,a,1
 2,b,2
+--- test copying in JSON mode with various styles
+copy copytest to stdout json;
+{"style":"DOS","test":"abc\r\ndef","filler":1}
+{"style":"Unix","test":"abc\ndef","filler":2}
+{"style":"Mac","test":"abc\rdef","filler":3}
+{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+copy copytest to stdout (format json);
+{"style":"DOS","test":"abc\r\ndef","filler":1}
+{"style":"Unix","test":"abc\ndef","filler":2}
+{"style":"Mac","test":"abc\rdef","filler":3}
+{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+-- all of the following should yield error
+copy copytest to stdout (format json, header);
+ERROR:  cannot specify HEADER in JSON mode
+copy copytest to stdout (format json, null '\N');
+ERROR:  cannot specify NULL in JSON mode
+copy copytest to stdout (format json, delimiter '|');
+ERROR:  cannot specify DELIMITER in JSON mode
+copy copytest to stdout (format json, default '|');
+ERROR:  cannot specify DEFAULT in JSON mode
+copy copytest from stdin(format json);
+ERROR:  COPY json mode cannot be used with COPY FROM
+-- all of the above should yield error
+-- embedded escaped characters
+create temp table copyjsontest (
+    id bigserial,
+    f1 text,
+    f2 timestamptz);
+insert into copyjsontest
+  select g.i,
+         CASE WHEN g.i % 2 = 0 THEN
+           'line with '' in it: ' || g.i::text
+         ELSE
+           'line with " in it: ' || g.i::text
+         END,
+         'Mon Feb 10 17:32:01 1997 PST'
+  from generate_series(1,5) as g(i);
+insert into copyjsontest (f1) values
+(E'aaa\"bbb'::text),
+(E'aaa\\bbb'::text),
+(E'aaa\/bbb'::text),
+(E'aaa\bbbb'::text),
+(E'aaa\fbbb'::text),
+(E'aaa\nbbb'::text),
+(E'aaa\rbbb'::text),
+(E'aaa\tbbb'::text);
+copy copyjsontest to stdout json;
+{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"}
+{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"}
+{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"}
+{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"}
+{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"}
+{"id":1,"f1":"aaa\"bbb","f2":null}
+{"id":2,"f1":"aaa\\bbb","f2":null}
+{"id":3,"f1":"aaa/bbb","f2":null}
+{"id":4,"f1":"aaa\bbbb","f2":null}
+{"id":5,"f1":"aaa\fbbb","f2":null}
+{"id":6,"f1":"aaa\nbbb","f2":null}
+{"id":7,"f1":"aaa\rbbb","f2":null}
+{"id":8,"f1":"aaa\tbbb","f2":null}
 create temp table copytest4 (
 	c1 int,
 	"colname with tab: 	" text);
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index f1699b66b0..3d21f20c98 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -82,6 +82,47 @@ this is just a line full of junk that would error out if parsed
 
 copy copytest3 to stdout csv header;
 
+--- test copying in JSON mode with various styles
+copy copytest to stdout json;
+
+copy copytest to stdout (format json);
+
+-- all of the following should yield error
+copy copytest to stdout (format json, header);
+copy copytest to stdout (format json, null '\N');
+copy copytest to stdout (format json, delimiter '|');
+copy copytest to stdout (format json, default '|');
+copy copytest from stdin(format json);
+-- all of the above should yield error
+
+-- embedded escaped characters
+create temp table copyjsontest (
+    id bigserial,
+    f1 text,
+    f2 timestamptz);
+
+insert into copyjsontest
+  select g.i,
+         CASE WHEN g.i % 2 = 0 THEN
+           'line with '' in it: ' || g.i::text
+         ELSE
+           'line with " in it: ' || g.i::text
+         END,
+         'Mon Feb 10 17:32:01 1997 PST'
+  from generate_series(1,5) as g(i);
+
+insert into copyjsontest (f1) values
+(E'aaa\"bbb'::text),
+(E'aaa\\bbb'::text),
+(E'aaa\/bbb'::text),
+(E'aaa\bbbb'::text),
+(E'aaa\fbbb'::text),
+(E'aaa\nbbb'::text),
+(E'aaa\rbbb'::text),
+(E'aaa\tbbb'::text);
+
+copy copyjsontest to stdout json;
+
 create temp table copytest4 (
 	c1 int,
 	"colname with tab: 	" text);
-- 
2.34.1

