From 17d9f3765bb74d863e229afed59af7dd923f5379 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Mon, 1 Apr 2024 19:47:40 +0800
Subject: [PATCH v10 1/2] introduce json format for COPY TO operation.  json
 format is only allowed in COPY TO operation.  also cannot be used with header
 option.

 discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com
 discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c34c@joeconway.com
---
 doc/src/sgml/ref/copy.sgml         |   5 ++
 src/backend/commands/copy.c        |  13 +++
 src/backend/commands/copyto.c      | 125 ++++++++++++++++++++---------
 src/backend/parser/gram.y          |   8 ++
 src/backend/utils/adt/json.c       |   5 +-
 src/include/commands/copy.h        |   1 +
 src/include/utils/json.h           |   2 +
 src/test/regress/expected/copy.out |  54 +++++++++++++
 src/test/regress/sql/copy.sql      |  38 +++++++++
 9 files changed, 208 insertions(+), 43 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 33ce7c4e..add84dbb 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -218,9 +218,14 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Selects the data format to be read or written:
       <literal>text</literal>,
       <literal>csv</literal> (Comma Separated Values),
+      <literal>json</literal> (JavaScript Object Notation),
       or <literal>binary</literal>.
       The default is <literal>text</literal>.
      </para>
+     <para>
+      The <literal>json</literal> option is allowed only in
+      <command>COPY TO</command>.
+     </para>
     </listitem>
    </varlistentry>
 
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f75e1d70..02f16d9e 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -497,6 +497,8 @@ ProcessCopyOptions(ParseState *pstate,
 				 /* default format */ ;
 			else if (strcmp(fmt, "csv") == 0)
 				opts_out->csv_mode = true;
+			else if (strcmp(fmt, "json") == 0)
+				opts_out->json_mode = true;
 			else if (strcmp(fmt, "binary") == 0)
 				opts_out->binary = true;
 			else
@@ -740,6 +742,11 @@ ProcessCopyOptions(ParseState *pstate,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("cannot specify HEADER in BINARY mode")));
 
+	if (opts_out->json_mode && opts_out->header_line)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot specify HEADER in JSON mode")));
+
 	/* Check quote */
 	if (!opts_out->csv_mode && opts_out->quote != NULL)
 		ereport(ERROR,
@@ -817,6 +824,12 @@ ProcessCopyOptions(ParseState *pstate,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("COPY FREEZE cannot be used with COPY TO")));
 
+	/* Check json format  */
+	if (opts_out->json_mode && is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use JSON mode in COPY FROM")));
+
 	if (opts_out->default_print)
 	{
 		if (!is_from)
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ae8b2e36..fe2eb244 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -24,6 +24,7 @@
 #include "executor/execdesc.h"
 #include "executor/executor.h"
 #include "executor/tuptable.h"
+#include "funcapi.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
@@ -31,6 +32,7 @@
 #include "pgstat.h"
 #include "storage/fd.h"
 #include "tcop/tcopprot.h"
+#include "utils/json.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
@@ -139,9 +141,20 @@ SendCopyBegin(CopyToState cstate)
 
 	pq_beginmessage(&buf, PqMsg_CopyOutResponse);
 	pq_sendbyte(&buf, format);	/* overall format */
-	pq_sendint16(&buf, natts);
-	for (i = 0; i < natts; i++)
-		pq_sendint16(&buf, format); /* per-column formats */
+	if (!cstate->opts.json_mode)
+	{
+		pq_sendint16(&buf, natts);
+		for (i = 0; i < natts; i++)
+			pq_sendint16(&buf, format); /* per-column formats */
+	}
+	else
+	{
+		/*
+		 * JSON mode is always one non-binary column
+		 */
+		pq_sendint16(&buf, 1);
+		pq_sendint16(&buf, 0);
+	}
 	pq_endmessage(&buf);
 	cstate->copy_dest = COPY_FRONTEND;
 }
@@ -901,11 +914,7 @@ DoCopyTo(CopyToState cstate)
 static void
 CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 {
-	bool		need_delim = false;
-	FmgrInfo   *out_functions = cstate->out_functions;
 	MemoryContext oldcontext;
-	ListCell   *cur;
-	char	   *string;
 
 	MemoryContextReset(cstate->rowcontext);
 	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
@@ -916,53 +925,89 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 		CopySendInt16(cstate, list_length(cstate->attnumlist));
 	}
 
-	/* Make sure the tuple is fully deconstructed */
-	slot_getallattrs(slot);
-
-	foreach(cur, cstate->attnumlist)
+	if (!cstate->opts.json_mode)
 	{
-		int			attnum = lfirst_int(cur);
-		Datum		value = slot->tts_values[attnum - 1];
-		bool		isnull = slot->tts_isnull[attnum - 1];
+		bool		need_delim = false;
+		FmgrInfo   *out_functions = cstate->out_functions;
+		ListCell   *cur;
+		char	   *string;
 
-		if (!cstate->opts.binary)
-		{
-			if (need_delim)
-				CopySendChar(cstate, cstate->opts.delim[0]);
-			need_delim = true;
-		}
+		/* Make sure the tuple is fully deconstructed */
+		slot_getallattrs(slot);
 
-		if (isnull)
-		{
-			if (!cstate->opts.binary)
-				CopySendString(cstate, cstate->opts.null_print_client);
-			else
-				CopySendInt32(cstate, -1);
-		}
-		else
+		foreach(cur, cstate->attnumlist)
 		{
+			int			attnum = lfirst_int(cur);
+			Datum		value = slot->tts_values[attnum - 1];
+			bool		isnull = slot->tts_isnull[attnum - 1];
+
 			if (!cstate->opts.binary)
 			{
-				string = OutputFunctionCall(&out_functions[attnum - 1],
-											value);
-				if (cstate->opts.csv_mode)
-					CopyAttributeOutCSV(cstate, string,
-										cstate->opts.force_quote_flags[attnum - 1]);
+				if (need_delim)
+					CopySendChar(cstate, cstate->opts.delim[0]);
+				need_delim = true;
+			}
+
+			if (isnull)
+			{
+				if (!cstate->opts.binary)
+					CopySendString(cstate, cstate->opts.null_print_client);
 				else
-					CopyAttributeOutText(cstate, string);
+					CopySendInt32(cstate, -1);
 			}
 			else
 			{
-				bytea	   *outputbytes;
+				if (!cstate->opts.binary)
+				{
+					string = OutputFunctionCall(&out_functions[attnum - 1],
+												value);
+					if (cstate->opts.csv_mode)
+						CopyAttributeOutCSV(cstate, string,
+											cstate->opts.force_quote_flags[attnum - 1]);
+					else
+						CopyAttributeOutText(cstate, string);
+				}
+				else
+				{
+					bytea	   *outputbytes;
 
-				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
-											   value);
-				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
-				CopySendData(cstate, VARDATA(outputbytes),
-							 VARSIZE(outputbytes) - VARHDRSZ);
+					outputbytes = SendFunctionCall(&out_functions[attnum - 1],
+												   value);
+					CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
+					CopySendData(cstate, VARDATA(outputbytes),
+								 VARSIZE(outputbytes) - VARHDRSZ);
+				}
 			}
 		}
 	}
+	else
+	{
+		Datum		rowdata;
+		StringInfo	result;
+
+		/*
+		 * if the COPY TO command's source data is from a query, not a table,
+		 * then we need to copy the TupleDesc from the cstate->queryDesc.
+		 * because returning slot's TupleDesc may change during query execution,
+		 * but composite_to_json requires correct TupleDesc.
+		 * so we need copy from cstate->queryDesc to make it bullet proof.
+		*/
+		if(!cstate->rel)
+		{
+			for (int i = 0; i < slot->tts_tupleDescriptor->natts; i++)
+			{
+				memcpy(TupleDescAttr(slot->tts_tupleDescriptor, i),
+					   TupleDescAttr(cstate->queryDesc->tupDesc, i),
+					   1 * sizeof(FormData_pg_attribute));
+			}
+			BlessTupleDesc(slot->tts_tupleDescriptor);
+		}
+		rowdata = ExecFetchSlotHeapTupleDatum(slot);
+		result = makeStringInfo();
+		composite_to_json(rowdata, result, false);
+
+		CopySendData(cstate, result->data, result->len);
+	}
 
 	CopySendEndOfRow(cstate);
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index f1af6147..d5ccdf0b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -3442,6 +3442,10 @@ copy_opt_item:
 				{
 					$$ = makeDefElem("format", (Node *) makeString("csv"), @1);
 				}
+			| JSON
+				{
+					$$ = makeDefElem("format", (Node *) makeString("json"), @1);
+				}
 			| HEADER_P
 				{
 					$$ = makeDefElem("header", (Node *) makeBoolean(true), @1);
@@ -3524,6 +3528,10 @@ copy_generic_opt_elem:
 				{
 					$$ = makeDefElem($1, $2, @1);
 				}
+			| FORMAT_LA copy_generic_opt_arg
+			{
+				$$ = makeDefElem("format", $2, @1);
+			}
 		;
 
 copy_generic_opt_arg:
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index d719a61f..fabd4e61 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -83,8 +83,6 @@ typedef struct JsonAggState
 	JsonUniqueBuilderState unique_check;
 } JsonAggState;
 
-static void composite_to_json(Datum composite, StringInfo result,
-							  bool use_line_feeds);
 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
 							  Datum *vals, bool *nulls, int *valcount,
 							  JsonTypeCategory tcategory, Oid outfuncoid,
@@ -507,8 +505,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
 
 /*
  * Turn a composite / record into JSON.
+ * Exported so COPY TO can use it.
  */
-static void
+void
 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
 {
 	HeapTupleHeader td;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 141fd48d..ff6ecc7a 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -62,6 +62,7 @@ typedef struct CopyFormatOptions
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
 	bool		csv_mode;		/* Comma Separated Value format? */
+	bool		json_mode;		/* JSON format? */
 	CopyHeaderChoice header_line;	/* header line? */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
 	int			null_print_len; /* length of same */
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index 6d7f1b38..d5631171 100644
--- a/src/include/utils/json.h
+++ b/src/include/utils/json.h
@@ -17,6 +17,8 @@
 #include "lib/stringinfo.h"
 
 /* functions in json.c */
+extern void composite_to_json(Datum composite, StringInfo result,
+							  bool use_line_feeds);
 extern void escape_json(StringInfo buf, const char *str);
 extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
 								const int *tzp);
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index b48365ec..0c5ade47 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -42,6 +42,60 @@ copy copytest3 to stdout csv header;
 c1,"col with , comma","col with "" quote"
 1,a,1
 2,b,2
+--- test copying in JSON mode with various styles
+copy copytest to stdout json;
+{"style":"DOS","test":"abc\r\ndef","filler":1}
+{"style":"Unix","test":"abc\ndef","filler":2}
+{"style":"Mac","test":"abc\rdef","filler":3}
+{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+copy copytest to stdout (format json);
+{"style":"DOS","test":"abc\r\ndef","filler":1}
+{"style":"Unix","test":"abc\ndef","filler":2}
+{"style":"Mac","test":"abc\rdef","filler":3}
+{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+-- Error
+copy copytest to stdout (format json, header);
+ERROR:  cannot specify HEADER in JSON mode
+-- Error
+copy copytest from stdout (format json);
+ERROR:  cannot use JSON mode in COPY FROM
+-- embedded escaped characters
+create temp table copyjsontest (
+    id bigserial,
+    f1 text,
+    f2 timestamptz);
+insert into copyjsontest
+  select g.i,
+         CASE WHEN g.i % 2 = 0 THEN
+           'line with '' in it: ' || g.i::text
+         ELSE
+           'line with " in it: ' || g.i::text
+         END,
+         'Mon Feb 10 17:32:01 1997 PST'
+  from generate_series(1,5) as g(i);
+insert into copyjsontest (f1) values
+(E'aaa\"bbb'::text),
+(E'aaa\\bbb'::text),
+(E'aaa\/bbb'::text),
+(E'aaa\bbbb'::text),
+(E'aaa\fbbb'::text),
+(E'aaa\nbbb'::text),
+(E'aaa\rbbb'::text),
+(E'aaa\tbbb'::text);
+copy copyjsontest to stdout json;
+{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"}
+{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"}
+{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"}
+{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"}
+{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"}
+{"id":1,"f1":"aaa\"bbb","f2":null}
+{"id":2,"f1":"aaa\\bbb","f2":null}
+{"id":3,"f1":"aaa/bbb","f2":null}
+{"id":4,"f1":"aaa\bbbb","f2":null}
+{"id":5,"f1":"aaa\fbbb","f2":null}
+{"id":6,"f1":"aaa\nbbb","f2":null}
+{"id":7,"f1":"aaa\rbbb","f2":null}
+{"id":8,"f1":"aaa\tbbb","f2":null}
 create temp table copytest4 (
 	c1 int,
 	"colname with tab: 	" text);
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 43d2e906..da6b0b0a 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -54,6 +54,44 @@ this is just a line full of junk that would error out if parsed
 
 copy copytest3 to stdout csv header;
 
+--- test copying in JSON mode with various styles
+copy copytest to stdout json;
+
+copy copytest to stdout (format json);
+
+-- Error
+copy copytest to stdout (format json, header);
+-- Error
+copy copytest from stdout (format json);
+
+-- embedded escaped characters
+create temp table copyjsontest (
+    id bigserial,
+    f1 text,
+    f2 timestamptz);
+
+insert into copyjsontest
+  select g.i,
+         CASE WHEN g.i % 2 = 0 THEN
+           'line with '' in it: ' || g.i::text
+         ELSE
+           'line with " in it: ' || g.i::text
+         END,
+         'Mon Feb 10 17:32:01 1997 PST'
+  from generate_series(1,5) as g(i);
+
+insert into copyjsontest (f1) values
+(E'aaa\"bbb'::text),
+(E'aaa\\bbb'::text),
+(E'aaa\/bbb'::text),
+(E'aaa\bbbb'::text),
+(E'aaa\fbbb'::text),
+(E'aaa\nbbb'::text),
+(E'aaa\rbbb'::text),
+(E'aaa\tbbb'::text);
+
+copy copyjsontest to stdout json;
+
 create temp table copytest4 (
 	c1 int,
 	"colname with tab: 	" text);

base-commit: 7aa00f1360e0c6938fdf32d3fbb8b847b6098b88
-- 
2.34.1

