From 365f340a0b733a3d9b5fdf540a2623c3ea9d4d8d Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sun, 8 Mar 2026 23:58:29 +0800
Subject: [PATCH v28 4/4] COPY TO JSON: support column lists

When a column list is specified (e.g. COPY t (a, b) TO ... FORMAT json),
build a projected TupleDesc containing only the selected columns and
form a new tuple per row via heap_form_tuple(), so that composite_to_json()
emits the correct column names and values.

Use HeapTupleHeaderGetDatum() directly on the formed tuple rather than
heap_copy_tuple_as_datum(), since heap_form_tuple() already stamps the
datum-length, type-id, and type-mod fields on t_data, avoiding an
unnecessary palloc+memcpy per row.

Add regression tests covering column lists with diverse data types
including json, jsonb, int[], numeric, boolean, timestamp, and text,
exercising various column subsets and NULL handling.

Author: Andrew Dunstan <andrew@dunslane.net>
Reviewed-by: jian he <jian.universality@gmail.com>

discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com
discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c34c@joeconway.com
---
 src/backend/commands/copyto.c      | 105 ++++++++++++++++++++++++-----
 src/test/regress/expected/copy.out |  73 +++++++++++++++++++-
 src/test/regress/sql/copy.sql      |  40 ++++++++++-
 3 files changed, 197 insertions(+), 21 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 38fbf7d4424..faa8e323f56 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -88,8 +88,13 @@ typedef struct CopyToStateData
 	char	   *filename;		/* filename, or NULL for STDOUT */
 	bool		is_program;		/* is 'filename' a program to popen? */
 	bool		json_row_delim_needed;	/* need delimiter before next row */
-	StringInfo	json_buf;		/* reusable buffer for JSON output, it is
-								 * initliazed in BeginCopyTo  */
+	StringInfo	json_buf;		/* reusable buffer for JSON output,
+								 * initialized in BeginCopyTo */
+	TupleDesc	tupDesc;		/* Descriptor for JSON output; for a column
+								 * list this is a projected descriptor */
+	Datum	   *json_projvalues;	/* pre-allocated projection values, or
+									 * NULL */
+	bool	   *json_projnulls; /* pre-allocated projection nulls, or NULL */
 	copy_data_dest_cb data_dest_cb; /* function for writing data */
 
 	CopyFormatOptions opts;
@@ -357,19 +362,53 @@ CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot)
 {
 	Datum		rowdata;
 
-	/*
-	 * composite_to_json() requires a stable TupleDesc. Since the slot's
-	 * descriptor (slot->tts_tupleDescriptor) can change during the execution
-	 * of a SELECT query, we use cstate->queryDesc->tupDesc instead. This
-	 * precaution is only necessary when the output slot's TupleDesc is of
-	 * type RECORDOID.
-	 */
-	if (!cstate->rel && slot->tts_tupleDescriptor->tdtypeid == RECORDOID)
-		slot->tts_tupleDescriptor = cstate->queryDesc->tupDesc;
-
 	resetStringInfo(cstate->json_buf);
 
-	rowdata = ExecFetchSlotHeapTupleDatum(slot);
+	if (cstate->json_projvalues != NULL)
+	{
+		/*
+		 * Column list case: project selected column values into sequential
+		 * positions matching the custom TupleDesc, then form a new tuple.
+		 */
+		HeapTuple	tup;
+		int			i = 0;
+
+		foreach_int(attnum, cstate->attnumlist)
+		{
+			cstate->json_projvalues[i] = slot->tts_values[attnum - 1];
+			cstate->json_projnulls[i] = slot->tts_isnull[attnum - 1];
+			i++;
+		}
+
+		tup = heap_form_tuple(cstate->tupDesc,
+							  cstate->json_projvalues,
+							  cstate->json_projnulls);
+
+		/*
+		 * heap_form_tuple already stamps the datum-length, type-id, and
+		 * type-mod fields on t_data, so we can use it directly as a composite
+		 * Datum without the extra pallocmemcpy that heap_copy_tuple_as_datum
+		 * would do.  Any TOAST pointers in the projected values will be
+		 * detoasted by the per-column output functions called from
+		 * composite_to_json.
+		 */
+		rowdata = HeapTupleGetDatum(tup);
+	}
+	else
+	{
+		/*
+		 * Full table or query without column list.  Ensure the slot uses
+		 * cstate->tupDesc so that the datum is stamped with the right type;
+		 * for queries output type is RECORDOID this must be the blessed
+		 * descriptor so that composite_to_json can look it up via
+		 * lookup_rowtype_tupdesc.
+		 */
+		if (!cstate->rel && slot->tts_tupleDescriptor->tdtypeid == RECORDOID)
+			slot->tts_tupleDescriptor = cstate->queryDesc->tupDesc;
+
+		rowdata = ExecFetchSlotHeapTupleDatum(slot);
+	}
+
 	composite_to_json(rowdata, cstate->json_buf, false);
 
 	if (cstate->opts.force_array)
@@ -841,6 +880,7 @@ BeginCopyTo(ParseState *pstate,
 
 		tupDesc = RelationGetDescr(cstate->rel);
 		cstate->partitions = children;
+		cstate->tupDesc = tupDesc;
 	}
 	else
 	{
@@ -978,20 +1018,49 @@ BeginCopyTo(ParseState *pstate,
 
 		tupDesc = cstate->queryDesc->tupDesc;
 		tupDesc = BlessTupleDesc(tupDesc);
+		cstate->tupDesc = tupDesc;
 	}
 
 	/* Generate or convert list of attributes to process */
 	cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
 
-	/* JSON outputs whole rows; a column list doesn't make sense */
+	/* Set up JSON-specific state */
 	if (cstate->opts.format == COPY_FORMAT_JSON)
 	{
 		cstate->json_buf = makeStringInfo();
 
-		if (attnamelist != NIL)
-			ereport(ERROR,
-					errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("column selection is not supported in JSON mode"));
+		if (attnamelist != NIL && rel)
+		{
+			int			natts = list_length(cstate->attnumlist);
+			TupleDesc	resultDesc;
+
+			/*
+			 * Build a TupleDesc describing only the selected columns so that
+			 * composite_to_json() emits the right column names and types.
+			 */
+			resultDesc = CreateTemplateTupleDesc(natts);
+
+			foreach_int(attnum, cstate->attnumlist)
+			{
+				Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+				TupleDescInitEntry(resultDesc,
+								   foreach_current_index(attnum) + 1,
+								   NameStr(attr->attname),
+								   attr->atttypid,
+								   attr->atttypmod,
+								   attr->attndims);
+			}
+
+			cstate->tupDesc = BlessTupleDesc(resultDesc);
+
+			/*
+			 * Pre-allocate arrays for projecting selected column values into
+			 * sequential positions matching the custom TupleDesc.
+			 */
+			cstate->json_projvalues = palloc_array(Datum, natts);
+			cstate->json_projnulls = palloc_array(bool, natts);
+		}
 	}
 
 	num_phys_attrs = tupDesc->natts;
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index e1d51335e33..e44b4a1d79d 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -77,6 +77,9 @@ c1,"col with , comma","col with "" quote"
 copy (select 1 union all select 2) to stdout with (format json);
 {"?column?":1}
 {"?column?":2}
+copy (select 1 as foo union all select 2) to stdout with (format json);
+{"foo":1}
+{"foo":2}
 copy (values (1), (2)) TO stdout with (format json);
 {"column1":1}
 {"column1":2}
@@ -134,8 +137,6 @@ copy copytest to stdout (format json, reject_limit 1);
 ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
 copy copytest from stdin(format json);
 ERROR:  COPY JSON mode cannot be used with COPY FROM
-copy copytest (style) to stdout (format json);
-ERROR:  column selection is not supported in JSON mode
 -- all of the above should yield error
 -- should fail: force_array requires json format
 copy copytest to stdout (format csv, force_array true);
@@ -160,6 +161,74 @@ copy copytest to stdout (format json, force_array false);
 {"style":"Unix","test":"abc\ndef","filler":2}
 {"style":"Mac","test":"abc\rdef","filler":3}
 {"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+-- column list with json format
+copy copytest (style, filler) to stdout (format json);
+{"style":"DOS","filler":1}
+{"style":"Unix","filler":2}
+{"style":"Mac","filler":3}
+{"style":"esc\\ape","filler":4}
+copy copytest (style, filler) to stdout (format json,  force_array true);
+[
+ {"style":"DOS","filler":1}
+,{"style":"Unix","filler":2}
+,{"style":"Mac","filler":3}
+,{"style":"esc\\ape","filler":4}
+]
+copy copytest (style, test, filler) to stdout (format json,  force_array true);
+[
+ {"style":"DOS","test":"abc\r\ndef","filler":1}
+,{"style":"Unix","test":"abc\ndef","filler":2}
+,{"style":"Mac","test":"abc\rdef","filler":3}
+,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+]
+-- column list with diverse data types
+create temp table copyjsontest_types (
+    id int,
+    js json,
+    jsb jsonb,
+    arr int[],
+    n numeric(10,2),
+    b boolean,
+    ts timestamp,
+    t text);
+insert into copyjsontest_types values
+(1, '{"a":1}', '{"b":2}', '{1,2,3}', 3.14, true,
+ '2024-01-15 10:30:00', 'hello'),
+(2, '[1,null,"x"]', '{"nested":{"k":"v"}}', '{4,5}', -99.99, false,
+ '2024-06-30 23:59:59', 'world'),
+(3, 'null', 'null', '{}', null, null, null, null);
+-- full table
+copy copyjsontest_types to stdout (format json);
+{"id":1,"js":{"a":1},"jsb":{"b": 2},"arr":[1,2,3],"n":3.14,"b":true,"ts":"2024-01-15T10:30:00","t":"hello"}
+{"id":2,"js":[1,null,"x"],"jsb":{"nested": {"k": "v"}},"arr":[4,5],"n":-99.99,"b":false,"ts":"2024-06-30T23:59:59","t":"world"}
+{"id":3,"js":null,"jsb":null,"arr":[],"n":null,"b":null,"ts":null,"t":null}
+-- column subsets exercising each type
+copy copyjsontest_types (id, js, jsb) to stdout (format json);
+{"id":1,"js":{"a":1},"jsb":{"b": 2}}
+{"id":2,"js":[1,null,"x"],"jsb":{"nested": {"k": "v"}}}
+{"id":3,"js":null,"jsb":null}
+copy copyjsontest_types (id, arr, n, b) to stdout (format json);
+{"id":1,"arr":[1,2,3],"n":3.14,"b":true}
+{"id":2,"arr":[4,5],"n":-99.99,"b":false}
+{"id":3,"arr":[],"n":null,"b":null}
+copy copyjsontest_types (jsb, t) to stdout (format json);
+{"jsb":{"b": 2},"t":"hello"}
+{"jsb":{"nested": {"k": "v"}},"t":"world"}
+{"jsb":null,"t":null}
+copy copyjsontest_types (id, ts) to stdout (format json);
+{"id":1,"ts":"2024-01-15T10:30:00"}
+{"id":2,"ts":"2024-06-30T23:59:59"}
+{"id":3,"ts":null}
+-- single column: json and jsonb
+copy copyjsontest_types (js) to stdout (format json);
+{"js":{"a":1}}
+{"js":[1,null,"x"]}
+{"js":null}
+copy copyjsontest_types (jsb) to stdout (format json);
+{"jsb":{"b": 2}}
+{"jsb":{"nested": {"k": "v"}}}
+{"jsb":null}
+drop table copyjsontest_types;
 -- embedded escaped characters
 create temp table copyjsontest (
     id bigserial,
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 764d19f4947..e4e70a82ecc 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -84,6 +84,7 @@ copy copytest3 to stdout csv header;
 
 --- test copying in JSON mode with various styles
 copy (select 1 union all select 2) to stdout with (format json);
+copy (select 1 as foo union all select 2) to stdout with (format json);
 copy (values (1), (2)) TO stdout with (format json);
 copy (select 1 union all select 2) to stdout with (format json, force_array true);
 copy (values (1), (2)) TO stdout with (format json, force_array true);
@@ -105,7 +106,6 @@ copy copytest to stdout (format json, force_null *);
 copy copytest to stdout (format json, on_error ignore);
 copy copytest to stdout (format json, reject_limit 1);
 copy copytest from stdin(format json);
-copy copytest (style) to stdout (format json);
 -- all of the above should yield error
 
 -- should fail: force_array requires json format
@@ -116,6 +116,44 @@ copy copytest to stdout (format json, force_array);
 copy copytest to stdout (format json, force_array true);
 copy copytest to stdout (format json, force_array false);
 
+-- column list with json format
+copy copytest (style, filler) to stdout (format json);
+copy copytest (style, filler) to stdout (format json,  force_array true);
+copy copytest (style, test, filler) to stdout (format json,  force_array true);
+
+-- column list with diverse data types
+create temp table copyjsontest_types (
+    id int,
+    js json,
+    jsb jsonb,
+    arr int[],
+    n numeric(10,2),
+    b boolean,
+    ts timestamp,
+    t text);
+
+insert into copyjsontest_types values
+(1, '{"a":1}', '{"b":2}', '{1,2,3}', 3.14, true,
+ '2024-01-15 10:30:00', 'hello'),
+(2, '[1,null,"x"]', '{"nested":{"k":"v"}}', '{4,5}', -99.99, false,
+ '2024-06-30 23:59:59', 'world'),
+(3, 'null', 'null', '{}', null, null, null, null);
+
+-- full table
+copy copyjsontest_types to stdout (format json);
+
+-- column subsets exercising each type
+copy copyjsontest_types (id, js, jsb) to stdout (format json);
+copy copyjsontest_types (id, arr, n, b) to stdout (format json);
+copy copyjsontest_types (jsb, t) to stdout (format json);
+copy copyjsontest_types (id, ts) to stdout (format json);
+
+-- single column: json and jsonb
+copy copyjsontest_types (js) to stdout (format json);
+copy copyjsontest_types (jsb) to stdout (format json);
+
+drop table copyjsontest_types;
+
 -- embedded escaped characters
 create temp table copyjsontest (
     id bigserial,
-- 
2.34.1

