From d043adcbb2032eb2c1218df99992008838f2e200 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Wed, 4 Mar 2026 09:40:10 -0500
Subject: [PATCH v25 4/4] COPY TO JSON: build JSON per-column, support column
 lists

Rework CopyToJsonOneRow to iterate attnumlist and build JSON objects
column-by-column using datum_to_json_append, instead of converting the
whole row via ExecFetchSlotHeapTupleDatum + composite_to_json.

This has several benefits:
- Column lists now work with JSON format (previously rejected)
- Per-column JSON type categorization is done once at startup rather
  than on every row (composite_to_json called json_categorize_type
  per column per row)
- The TupleDesc memcpy/BlessTupleDesc hack for query-based COPY is
  eliminated entirely
- Pre-escaped column key strings avoid repeated escape_json calls

Add CopyToJsonStart (pre-computes escaped key strings and json_buf),
CopyToJsonOutFunc (calls json_categorize_type once per column), and
export datum_to_json_append from json.c for efficient append-to-
StringInfo JSON serialization.
---
 src/backend/commands/copyto.c      | 159 ++++++++++++++++++-----------
 src/backend/utils/adt/json.c       |  14 +++
 src/include/utils/jsonfuncs.h      |   2 +
 src/test/regress/expected/copy.out |   8 +-
 src/test/regress/sql/copy.sql      |   4 +-
 5 files changed, 126 insertions(+), 61 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index a7615cc34ec..9502c910b43 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -26,7 +26,6 @@
 #include "executor/execdesc.h"
 #include "executor/executor.h"
 #include "executor/tuptable.h"
-#include "funcapi.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
@@ -35,6 +34,7 @@
 #include "storage/fd.h"
 #include "tcop/tcopprot.h"
 #include "utils/json.h"
+#include "utils/jsonfuncs.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
@@ -89,8 +89,10 @@ typedef struct CopyToStateData
 
 	/* JSON format state */
 	bool		json_row_delim_needed;	/* need delimiter before next row */
-	bool		json_tupledesc_ready;	/* TupleDesc setup done for JSON */
 	StringInfoData json_buf;	/* reusable buffer for JSON output */
+	JsonTypeCategory *json_categories;	/* per-column JSON type categories */
+	Oid		   *json_outfuncoids;	/* per-column JSON output func OIDs */
+	char	  **json_col_keys;	/* per-column pre-escaped "key": strings */
 
 	copy_data_dest_cb data_dest_cb; /* function for writing data */
 
@@ -138,6 +140,8 @@ static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot);
 static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot,
 								 bool is_csv);
 static void CopyToTextLikeEnd(CopyToState cstate);
+static void CopyToJsonStart(CopyToState cstate, TupleDesc tupDesc);
+static void CopyToJsonOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
 static void CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot);
 static void CopyToJsonEnd(CopyToState cstate);
 static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc);
@@ -160,8 +164,8 @@ static void CopySendInt16(CopyToState cstate, int16 val);
  * COPY TO routines for built-in formats.
  *
  * Text and CSV formats share the same TextLike routines except for the
- * one-row callback.  JSON shares the start and outfunc callbacks with
- * text/CSV, but has its own one-row and end callbacks.
+ * one-row callback.  JSON has its own start, outfunc, one-row, and end
+ * callbacks.
  */
 
 /* text format */
@@ -182,8 +186,8 @@ static const CopyToRoutine CopyToRoutineCSV = {
 
 /* json format */
 static const CopyToRoutine CopyToRoutineJson = {
-	.CopyToStart = CopyToTextLikeStart,
-	.CopyToOutFunc = CopyToTextLikeOutFunc,
+	.CopyToStart = CopyToJsonStart,
+	.CopyToOutFunc = CopyToJsonOutFunc,
 	.CopyToOneRow = CopyToJsonOneRow,
 	.CopyToEnd = CopyToJsonEnd,
 };
@@ -211,7 +215,7 @@ CopyToGetRoutine(const CopyFormatOptions *opts)
 	return &CopyToRoutineText;
 }
 
-/* Implementation of the start callback for text, CSV, and json formats */
+/* Implementation of the start callback for text and CSV formats */
 static void
 CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 {
@@ -230,8 +234,6 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 		ListCell   *cur;
 		bool		hdr_delim = false;
 
-		Assert(cstate->opts.format != COPY_FORMAT_JSON);
-
 		foreach(cur, cstate->attnumlist)
 		{
 			int			attnum = lfirst_int(cur);
@@ -251,30 +253,10 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 
 		CopySendTextLikeEndOfRow(cstate);
 	}
-
-	/* JSON-specific initialization */
-	if (cstate->opts.format == COPY_FORMAT_JSON)
-	{
-		MemoryContext oldcxt;
-
-		/* Allocate reusable JSON output buffer in long-lived context */
-		oldcxt = MemoryContextSwitchTo(cstate->copycontext);
-		initStringInfo(&cstate->json_buf);
-		MemoryContextSwitchTo(oldcxt);
-
-		/*
-		 * If FORCE_ARRAY has been specified, send the opening bracket.
-		 */
-		if (cstate->opts.force_array)
-		{
-			CopySendChar(cstate, '[');
-			CopySendTextLikeEndOfRow(cstate);
-		}
-	}
 }
 
 /*
- * Implementation of the outfunc callback for text, CSV, and json formats. Assign
+ * Implementation of the outfunc callback for text and CSV formats. Assign
  * the output function data to the given *finfo.
  */
 static void
@@ -354,34 +336,95 @@ CopyToTextLikeEnd(CopyToState cstate)
 	/* Nothing to do here */
 }
 
+/*
+ * Implementation of the start callback for json format.
+ *
+ * Pre-compute the escaped JSON key strings ('"colname":') for each selected
+ * column so CopyToJsonOneRow only needs to copy them per row.
+ */
+static void
+CopyToJsonStart(CopyToState cstate, TupleDesc tupDesc)
+{
+	MemoryContext oldcxt;
+	StringInfoData keybuf;
+
+	oldcxt = MemoryContextSwitchTo(cstate->copycontext);
+
+	/* Allocate reusable JSON output buffer */
+	initStringInfo(&cstate->json_buf);
+
+	/* Pre-build escaped key strings: "\"colname\":" */
+	cstate->json_col_keys = palloc0(tupDesc->natts * sizeof(char *));
+	initStringInfo(&keybuf);
+	foreach_int(attnum, cstate->attnumlist)
+	{
+		char	   *colname;
+
+		colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
+
+		resetStringInfo(&keybuf);
+		escape_json(&keybuf, colname);
+		appendStringInfoChar(&keybuf, ':');
+
+		cstate->json_col_keys[attnum - 1] = pstrdup(keybuf.data);
+	}
+	pfree(keybuf.data);
+
+	MemoryContextSwitchTo(oldcxt);
+
+	/* If FORCE_ARRAY, send the opening bracket */
+	if (cstate->opts.force_array)
+	{
+		CopySendChar(cstate, '[');
+		CopySendTextLikeEndOfRow(cstate);
+	}
+}
+
+/*
+ * Implementation of the outfunc callback for json format.
+ *
+ * Instead of text output functions, we categorize each column's type for
+ * JSON serialization once so CopyToJsonOneRow can use datum_to_json_append
+ * directly.
+ */
+static void
+CopyToJsonOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
+{
+	int			attidx = finfo - cstate->out_functions;
+
+	json_categorize_type(atttypid, false,
+						 &cstate->json_categories[attidx],
+						 &cstate->json_outfuncoids[attidx]);
+}
+
 /* Implementation of per-row callback for json format */
 static void
 CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot)
 {
-	Datum		rowdata;
+	bool		needsep = false;
 
-	/*
-	 * For query-based COPY, copy the query's TupleDesc attributes into the
-	 * slot's TupleDesc once.  BlessTupleDesc registers the RECORDOID
-	 * descriptor so that lookup_rowtype_tupdesc inside composite_to_json can
-	 * find it.
-	 */
-	if (!cstate->rel && !cstate->json_tupledesc_ready)
-	{
-		memcpy(TupleDescAttr(slot->tts_tupleDescriptor, 0),
-			   TupleDescAttr(cstate->queryDesc->tupDesc, 0),
-			   cstate->queryDesc->tupDesc->natts * sizeof(FormData_pg_attribute));
-
-		for (int i = 0; i < cstate->queryDesc->tupDesc->natts; i++)
-			populate_compact_attribute(slot->tts_tupleDescriptor, i);
-
-		BlessTupleDesc(slot->tts_tupleDescriptor);
-		cstate->json_tupledesc_ready = true;
-	}
-
-	rowdata = ExecFetchSlotHeapTupleDatum(slot);
 	resetStringInfo(&cstate->json_buf);
-	composite_to_json(rowdata, &cstate->json_buf, false);
+	appendStringInfoChar(&cstate->json_buf, '{');
+
+	foreach_int(attnum, cstate->attnumlist)
+	{
+		Datum		value = slot->tts_values[attnum - 1];
+		bool		isnull = slot->tts_isnull[attnum - 1];
+
+		if (needsep)
+			appendStringInfoChar(&cstate->json_buf, ',');
+		needsep = true;
+
+		/* Append pre-escaped "key": */
+		appendStringInfoString(&cstate->json_buf,
+							   cstate->json_col_keys[attnum - 1]);
+
+		datum_to_json_append(value, isnull, &cstate->json_buf,
+							 cstate->json_categories[attnum - 1],
+							 cstate->json_outfuncoids[attnum - 1]);
+	}
+
+	appendStringInfoChar(&cstate->json_buf, '}');
 
 	if (cstate->opts.force_array)
 	{
@@ -1004,12 +1047,6 @@ BeginCopyTo(ParseState *pstate,
 	/* Generate or convert list of attributes to process */
 	cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
 
-	/* JSON outputs whole rows; a column list doesn't make sense */
-	if (cstate->opts.format == COPY_FORMAT_JSON && attnamelist != NIL)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("column selection is not supported in JSON mode")));
-
 	num_phys_attrs = tupDesc->natts;
 
 	/* Convert FORCE_QUOTE name list to per-column flags, check validity */
@@ -1204,6 +1241,12 @@ DoCopyTo(CopyToState cstate)
 
 	/* Get info about the columns we need to process. */
 	cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
+	if (cstate->opts.format == COPY_FORMAT_JSON)
+	{
+		/* JSON outfunc callback stores per-column type categorization here */
+		cstate->json_categories = palloc0(num_phys_attrs * sizeof(JsonTypeCategory));
+		cstate->json_outfuncoids = palloc0(num_phys_attrs * sizeof(Oid));
+	}
 	foreach(cur, cstate->attnumlist)
 	{
 		int			attnum = lfirst_int(cur);
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index f609d7b9417..de81160a831 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -771,6 +771,20 @@ datum_to_json(Datum val, JsonTypeCategory tcategory, Oid outfuncoid)
 	return PointerGetDatum(cstring_to_text_with_len(result.data, result.len));
 }
 
+/*
+ * Append JSON representation of a Datum to a StringInfo.
+ *
+ * tcategory and outfuncoid are from a previous call to json_categorize_type.
+ * If is_null is true, appends "null" regardless of tcategory/outfuncoid.
+ */
+void
+datum_to_json_append(Datum val, bool is_null, StringInfo result,
+					 JsonTypeCategory tcategory, Oid outfuncoid)
+{
+	datum_to_json_internal(val, is_null, result, tcategory, outfuncoid,
+						   false);
+}
+
 /*
  * json_agg transition function
  *
diff --git a/src/include/utils/jsonfuncs.h b/src/include/utils/jsonfuncs.h
index 636f0f55840..12a01451fbb 100644
--- a/src/include/utils/jsonfuncs.h
+++ b/src/include/utils/jsonfuncs.h
@@ -85,6 +85,8 @@ extern void json_categorize_type(Oid typoid, bool is_jsonb,
 								 JsonTypeCategory *tcategory, Oid *outfuncoid);
 extern Datum datum_to_json(Datum val, JsonTypeCategory tcategory,
 						   Oid outfuncoid);
+extern void datum_to_json_append(Datum val, bool is_null, StringInfo result,
+								 JsonTypeCategory tcategory, Oid outfuncoid);
 extern Datum datum_to_jsonb(Datum val, JsonTypeCategory tcategory,
 							Oid outfuncoid);
 extern Datum jsonb_from_text(text *js, bool unique_keys);
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index a7e88b711d7..d60e5a4d32a 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -111,8 +111,6 @@ LINE 1: copy copytest to stdout (format json, on_error ignore);
                                               ^
 copy copytest from stdin(format json);
 ERROR:  COPY JSON mode cannot be used with COPY FROM
-copy copytest (style) to stdout (format json);
-ERROR:  column selection is not supported in JSON mode
 -- all of the above should yield error
 -- should fail: force_array requires json format
 copy copytest to stdout (format csv, force_array true);
@@ -137,6 +135,12 @@ copy copytest to stdout (format json, force_array false);
 {"style":"Unix","test":"abc\ndef","filler":2}
 {"style":"Mac","test":"abc\rdef","filler":3}
 {"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+-- column list with json format
+copy copytest (style, filler) to stdout (format json);
+{"style":"DOS","filler":1}
+{"style":"Unix","filler":2}
+{"style":"Mac","filler":3}
+{"style":"esc\\ape","filler":4}
 -- embedded escaped characters
 create temp table copyjsontest (
     id bigserial,
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index ae202fc5e8d..d64f4c66b93 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -99,7 +99,6 @@ copy copytest to stdout (format json, force_not_null *);
 copy copytest to stdout (format json, force_null *);
 copy copytest to stdout (format json, on_error ignore);
 copy copytest from stdin(format json);
-copy copytest (style) to stdout (format json);
 -- all of the above should yield error
 
 -- should fail: force_array requires json format
@@ -110,6 +109,9 @@ copy copytest to stdout (format json, force_array);
 copy copytest to stdout (format json, force_array true);
 copy copytest to stdout (format json, force_array false);
 
+-- column list with json format
+copy copytest (style, filler) to stdout (format json);
+
 -- embedded escaped characters
 create temp table copyjsontest (
     id bigserial,
-- 
2.43.0

