public inbox for [email protected]  
help / color / mirror / Atom feed
From: Joe Conway <[email protected]>
To: Davin Shearer <[email protected]>
To: PostgreSQL-development <[email protected]>
Subject: Re: Emitting JSON to file using COPY TO
Date: Fri, 1 Dec 2023 14:28:55 -0500
Message-ID: <[email protected]> (raw)
In-Reply-To: <CALvfUkCq3pvkmniWEKZwkbA6hNNvkANF3OamMfhC170RgY+HJg@mail.gmail.com>
References: <CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU=kcg@mail.gmail.com>
	<[email protected]>
	<CAFCRh-_LhP5Ln4-beVz=1pQ9VycU3hAxnJHXhPHZRmX0BSCVEQ@mail.gmail.com>
	<CAFCRh--Vhx0105S7SWAYHGcVMTSRtbcoArDUGSbYSWRUGunW_g@mail.gmail.com>
	<CAKFQuwZb3zaSqkOvGXRRLZEW61az+4xyHZ7tBPR91JfrcdCKvA@mail.gmail.com>
	<[email protected]>
	<CAFCRh-_GdiUvjd5z5FfvTfhruOnYqBu163XU47zZE8RNATCJGQ@mail.gmail.com>
	<[email protected]>
	<CALvfUkCq3pvkmniWEKZwkbA6hNNvkANF3OamMfhC170RgY+HJg@mail.gmail.com>

On 11/29/23 10:32, Davin Shearer wrote:
> Thanks for the responses everyone.
> 
> I worked around the issue using the `psql -tc` method as Filip described.
> 
> I think it would be great to support writing JSON using COPY TO at 
> some point so I can emit JSON to files using a PostgreSQL function directly.
> 
> -Davin
> 
> On Tue, Nov 28, 2023 at 2:36 AM Filip Sedlák <[email protected] 
> <mailto:[email protected]>> wrote:
> 
>     This would be a very special case for COPY. It applies only to a single
>     column of JSON values. The original problem can be solved with psql
>     --tuples-only as David wrote earlier.
> 
> 
>     $ psql -tc 'select json_agg(row_to_json(t))
>                    from (select * from public.tbl_json_test) t;'
> 
>        [{"id":1,"t_test":"here's a \"string\""}]
> 
> 
>     Special-casing any encoding/escaping scheme leads to bugs and harder
>     parsing.

(moved to hackers)

I did a quick PoC patch (attached) -- if there interest and no hard 
objections I would like to get it up to speed for the January commitfest.

Currently the patch lacks documentation and regression test support.

Questions:
----------
1. Is supporting JSON array format sufficient, or does it need to 
support some other options? How flexible does the support scheme need to be?

2. This only supports COPY TO and we would undoubtedly want to support 
COPY FROM for JSON as well, but is that required from the start?

Thanks for any feedback.

-- 
Joe Conway
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com


Attachments:

  [text/x-patch] copyto_json.000.diff (8.3K, 2-copyto_json.000.diff)
  download | inline diff:
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cfad47b..bc1f684 100644
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** ProcessCopyOptions(ParseState *pstate,
*** 443,448 ****
--- 443,450 ----
  				 /* default format */ ;
  			else if (strcmp(fmt, "csv") == 0)
  				opts_out->csv_mode = true;
+ 			else if (strcmp(fmt, "json") == 0)
+ 				opts_out->json_mode = true;
  			else if (strcmp(fmt, "binary") == 0)
  				opts_out->binary = true;
  			else
*************** ProcessCopyOptions(ParseState *pstate,
*** 667,672 ****
--- 669,679 ----
  				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  				 errmsg("cannot specify HEADER in BINARY mode")));
  
+ 	if (opts_out->json_mode && opts_out->header_line)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 				 errmsg("cannot specify HEADER in JSON mode")));
+ 
  	/* Check quote */
  	if (!opts_out->csv_mode && opts_out->quote != NULL)
  		ereport(ERROR,
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index c66a047..f6ee771 100644
*** a/src/backend/commands/copyto.c
--- b/src/backend/commands/copyto.c
***************
*** 37,42 ****
--- 37,43 ----
  #include "rewrite/rewriteHandler.h"
  #include "storage/fd.h"
  #include "tcop/tcopprot.h"
+ #include "utils/json.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
  #include "utils/partcache.h"
*************** typedef struct
*** 112,117 ****
--- 113,120 ----
  /* NOTE: there's a copy of this in copyfromparse.c */
  static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
  
+ /* need delimiter to start next json array element */
+ static bool json_row_delim_needed = false;
  
  /* non-export function prototypes */
  static void EndCopy(CopyToState cstate);
*************** DoCopyTo(CopyToState cstate)
*** 845,850 ****
--- 848,861 ----
  
  			CopySendEndOfRow(cstate);
  		}
+ 
+ 		/* if a JSON has been requested send the opening bracket */
+ 		if (cstate->opts.json_mode)
+ 		{
+ 			CopySendChar(cstate, '[');
+ 			CopySendEndOfRow(cstate);
+ 			json_row_delim_needed = false;
+ 		}
  	}
  
  	if (cstate->rel)
*************** DoCopyTo(CopyToState cstate)
*** 892,897 ****
--- 903,915 ----
  		CopySendEndOfRow(cstate);
  	}
  
+ 	/* if a JSON has been requested send the closing bracket */
+ 	if (cstate->opts.json_mode)
+ 	{
+ 		CopySendChar(cstate, ']');
+ 		CopySendEndOfRow(cstate);
+ 	}
+ 
  	MemoryContextDelete(cstate->rowcontext);
  
  	if (fe_copy)
*************** DoCopyTo(CopyToState cstate)
*** 906,916 ****
  static void
  CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
  {
- 	bool		need_delim = false;
- 	FmgrInfo   *out_functions = cstate->out_functions;
  	MemoryContext oldcontext;
- 	ListCell   *cur;
- 	char	   *string;
  
  	MemoryContextReset(cstate->rowcontext);
  	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
--- 924,930 ----
*************** CopyOneRowTo(CopyToState cstate, TupleTa
*** 921,974 ****
  		CopySendInt16(cstate, list_length(cstate->attnumlist));
  	}
  
! 	/* Make sure the tuple is fully deconstructed */
! 	slot_getallattrs(slot);
! 
! 	foreach(cur, cstate->attnumlist)
  	{
! 		int			attnum = lfirst_int(cur);
! 		Datum		value = slot->tts_values[attnum - 1];
! 		bool		isnull = slot->tts_isnull[attnum - 1];
  
! 		if (!cstate->opts.binary)
! 		{
! 			if (need_delim)
! 				CopySendChar(cstate, cstate->opts.delim[0]);
! 			need_delim = true;
! 		}
  
! 		if (isnull)
! 		{
! 			if (!cstate->opts.binary)
! 				CopySendString(cstate, cstate->opts.null_print_client);
! 			else
! 				CopySendInt32(cstate, -1);
! 		}
! 		else
  		{
  			if (!cstate->opts.binary)
  			{
! 				string = OutputFunctionCall(&out_functions[attnum - 1],
! 											value);
! 				if (cstate->opts.csv_mode)
! 					CopyAttributeOutCSV(cstate, string,
! 										cstate->opts.force_quote_flags[attnum - 1],
! 										list_length(cstate->attnumlist) == 1);
  				else
! 					CopyAttributeOutText(cstate, string);
  			}
  			else
  			{
! 				bytea	   *outputbytes;
  
! 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 											   value);
! 				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! 				CopySendData(cstate, VARDATA(outputbytes),
! 							 VARSIZE(outputbytes) - VARHDRSZ);
  			}
  		}
  	}
  
  	CopySendEndOfRow(cstate);
  
--- 935,1015 ----
  		CopySendInt16(cstate, list_length(cstate->attnumlist));
  	}
  
! 	if (!cstate->opts.json_mode)
  	{
! 		bool		need_delim = false;
! 		FmgrInfo   *out_functions = cstate->out_functions;
! 		ListCell   *cur;
! 		char	   *string;
  
! 		/* Make sure the tuple is fully deconstructed */
! 		slot_getallattrs(slot);
  
! 		foreach(cur, cstate->attnumlist)
  		{
+ 			int			attnum = lfirst_int(cur);
+ 			Datum		value = slot->tts_values[attnum - 1];
+ 			bool		isnull = slot->tts_isnull[attnum - 1];
+ 
  			if (!cstate->opts.binary)
  			{
! 				if (need_delim)
! 					CopySendChar(cstate, cstate->opts.delim[0]);
! 				need_delim = true;
! 			}
! 
! 			if (isnull)
! 			{
! 				if (!cstate->opts.binary)
! 					CopySendString(cstate, cstate->opts.null_print_client);
  				else
! 					CopySendInt32(cstate, -1);
  			}
  			else
  			{
! 				if (!cstate->opts.binary)
! 				{
! 					string = OutputFunctionCall(&out_functions[attnum - 1],
! 												value);
! 					if (cstate->opts.csv_mode)
! 						CopyAttributeOutCSV(cstate, string,
! 											cstate->opts.force_quote_flags[attnum - 1],
! 											list_length(cstate->attnumlist) == 1);
! 					else
! 						CopyAttributeOutText(cstate, string);
! 				}
! 				else
! 				{
! 					bytea	   *outputbytes;
  
! 					outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 												   value);
! 					CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! 					CopySendData(cstate, VARDATA(outputbytes),
! 								 VARSIZE(outputbytes) - VARHDRSZ);
! 				}
  			}
  		}
  	}
+ 	else
+ 	{
+ 		Datum	rowdata = ExecFetchSlotHeapTupleDatum(slot);
+ 		StringInfo	result;
+ 
+ 		result = makeStringInfo();
+ 		composite_to_json(rowdata, result, false);
+ 
+ 		if (json_row_delim_needed)
+ 			CopySendChar(cstate, ',');
+ 		else
+ 		{
+ 			/* first row needs no delimiter */
+ 			CopySendChar(cstate, ' ');
+ 			json_row_delim_needed = true;
+ 		}
+ 
+ 		CopyAttributeOutText(cstate, result->data);
+ 	}
  
  	CopySendEndOfRow(cstate);
  
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 71ae53f..cb4311e 100644
*** a/src/backend/utils/adt/json.c
--- b/src/backend/utils/adt/json.c
*************** typedef struct JsonAggState
*** 83,90 ****
  	JsonUniqueBuilderState unique_check;
  } JsonAggState;
  
- static void composite_to_json(Datum composite, StringInfo result,
- 							  bool use_line_feeds);
  static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
  							  Datum *vals, bool *nulls, int *valcount,
  							  JsonTypeCategory tcategory, Oid outfuncoid,
--- 83,88 ----
*************** array_to_json_internal(Datum array, Stri
*** 490,497 ****
  
  /*
   * Turn a composite / record into JSON.
   */
! static void
  composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
  {
  	HeapTupleHeader td;
--- 488,496 ----
  
  /*
   * Turn a composite / record into JSON.
+  * Exported so COPY TO can use it.
   */
! void
  composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
  {
  	HeapTupleHeader td;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index f2cca0b..e66bd01 100644
*** a/src/include/commands/copy.h
--- b/src/include/commands/copy.h
*************** typedef struct CopyFormatOptions
*** 43,48 ****
--- 43,49 ----
  	bool		binary;			/* binary format? */
  	bool		freeze;			/* freeze rows on loading? */
  	bool		csv_mode;		/* Comma Separated Value format? */
+ 	bool		json_mode;		/* JSON format? */
  	CopyHeaderChoice header_line;	/* header line? */
  	char	   *null_print;		/* NULL marker string (server encoding!) */
  	int			null_print_len; /* length of same */
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index f07e82c..badc5a6 100644
*** a/src/include/utils/json.h
--- b/src/include/utils/json.h
***************
*** 17,22 ****
--- 17,24 ----
  #include "lib/stringinfo.h"
  
  /* functions in json.c */
+ extern void composite_to_json(Datum composite, StringInfo result,
+ 							  bool use_line_feeds);
  extern void escape_json(StringInfo buf, const char *str);
  extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
  								const int *tzp);


view thread (35+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected]
  Subject: Re: Emitting JSON to file using COPY TO
  In-Reply-To: <[email protected]>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox