public inbox for [email protected]
help / color / mirror / Atom feedFrom: Joe Conway <[email protected]>
To: Davin Shearer <[email protected]>
To: PostgreSQL-development <[email protected]>
Subject: Re: Emitting JSON to file using COPY TO
Date: Fri, 1 Dec 2023 14:28:55 -0500
Message-ID: <[email protected]> (raw)
In-Reply-To: <CALvfUkCq3pvkmniWEKZwkbA6hNNvkANF3OamMfhC170RgY+HJg@mail.gmail.com>
References: <CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU=kcg@mail.gmail.com>
<[email protected]>
<CAFCRh-_LhP5Ln4-beVz=1pQ9VycU3hAxnJHXhPHZRmX0BSCVEQ@mail.gmail.com>
<CAFCRh--Vhx0105S7SWAYHGcVMTSRtbcoArDUGSbYSWRUGunW_g@mail.gmail.com>
<CAKFQuwZb3zaSqkOvGXRRLZEW61az+4xyHZ7tBPR91JfrcdCKvA@mail.gmail.com>
<[email protected]>
<CAFCRh-_GdiUvjd5z5FfvTfhruOnYqBu163XU47zZE8RNATCJGQ@mail.gmail.com>
<[email protected]>
<CALvfUkCq3pvkmniWEKZwkbA6hNNvkANF3OamMfhC170RgY+HJg@mail.gmail.com>
On 11/29/23 10:32, Davin Shearer wrote:
> Thanks for the responses everyone.
>
> I worked around the issue using the `psql -tc` method as Filip described.
>
> I think it would be great to support writing JSON using COPY TO at
> some point so I can emit JSON to files using a PostgreSQL function directly.
>
> -Davin
>
> On Tue, Nov 28, 2023 at 2:36 AM Filip Sedlák <[email protected]
> <mailto:[email protected]>> wrote:
>
> This would be a very special case for COPY. It applies only to a single
> column of JSON values. The original problem can be solved with psql
> --tuples-only as David wrote earlier.
>
>
> $ psql -tc 'select json_agg(row_to_json(t))
> from (select * from public.tbl_json_test) t;'
>
> [{"id":1,"t_test":"here's a \"string\""}]
>
>
> Special-casing any encoding/escaping scheme leads to bugs and harder
> parsing.
(moved to hackers)
I did a quick PoC patch (attached) -- if there interest and no hard
objections I would like to get it up to speed for the January commitfest.
Currently the patch lacks documentation and regression test support.
Questions:
----------
1. Is supporting JSON array format sufficient, or does it need to
support some other options? How flexible does the support scheme need to be?
2. This only supports COPY TO and we would undoubtedly want to support
COPY FROM for JSON as well, but is that required from the start?
Thanks for any feedback.
--
Joe Conway
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
Attachments:
[text/x-patch] copyto_json.000.diff (8.3K, 2-copyto_json.000.diff)
download | inline diff:
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cfad47b..bc1f684 100644
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** ProcessCopyOptions(ParseState *pstate,
*** 443,448 ****
--- 443,450 ----
/* default format */ ;
else if (strcmp(fmt, "csv") == 0)
opts_out->csv_mode = true;
+ else if (strcmp(fmt, "json") == 0)
+ opts_out->json_mode = true;
else if (strcmp(fmt, "binary") == 0)
opts_out->binary = true;
else
*************** ProcessCopyOptions(ParseState *pstate,
*** 667,672 ****
--- 669,679 ----
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot specify HEADER in BINARY mode")));
+ if (opts_out->json_mode && opts_out->header_line)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot specify HEADER in JSON mode")));
+
/* Check quote */
if (!opts_out->csv_mode && opts_out->quote != NULL)
ereport(ERROR,
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index c66a047..f6ee771 100644
*** a/src/backend/commands/copyto.c
--- b/src/backend/commands/copyto.c
***************
*** 37,42 ****
--- 37,43 ----
#include "rewrite/rewriteHandler.h"
#include "storage/fd.h"
#include "tcop/tcopprot.h"
+ #include "utils/json.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
*************** typedef struct
*** 112,117 ****
--- 113,120 ----
/* NOTE: there's a copy of this in copyfromparse.c */
static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
+ /* need delimiter to start next json array element */
+ static bool json_row_delim_needed = false;
/* non-export function prototypes */
static void EndCopy(CopyToState cstate);
*************** DoCopyTo(CopyToState cstate)
*** 845,850 ****
--- 848,861 ----
CopySendEndOfRow(cstate);
}
+
+ /* if a JSON has been requested send the opening bracket */
+ if (cstate->opts.json_mode)
+ {
+ CopySendChar(cstate, '[');
+ CopySendEndOfRow(cstate);
+ json_row_delim_needed = false;
+ }
}
if (cstate->rel)
*************** DoCopyTo(CopyToState cstate)
*** 892,897 ****
--- 903,915 ----
CopySendEndOfRow(cstate);
}
+ /* if a JSON has been requested send the closing bracket */
+ if (cstate->opts.json_mode)
+ {
+ CopySendChar(cstate, ']');
+ CopySendEndOfRow(cstate);
+ }
+
MemoryContextDelete(cstate->rowcontext);
if (fe_copy)
*************** DoCopyTo(CopyToState cstate)
*** 906,916 ****
static void
CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
{
- bool need_delim = false;
- FmgrInfo *out_functions = cstate->out_functions;
MemoryContext oldcontext;
- ListCell *cur;
- char *string;
MemoryContextReset(cstate->rowcontext);
oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
--- 924,930 ----
*************** CopyOneRowTo(CopyToState cstate, TupleTa
*** 921,974 ****
CopySendInt16(cstate, list_length(cstate->attnumlist));
}
! /* Make sure the tuple is fully deconstructed */
! slot_getallattrs(slot);
!
! foreach(cur, cstate->attnumlist)
{
! int attnum = lfirst_int(cur);
! Datum value = slot->tts_values[attnum - 1];
! bool isnull = slot->tts_isnull[attnum - 1];
! if (!cstate->opts.binary)
! {
! if (need_delim)
! CopySendChar(cstate, cstate->opts.delim[0]);
! need_delim = true;
! }
! if (isnull)
! {
! if (!cstate->opts.binary)
! CopySendString(cstate, cstate->opts.null_print_client);
! else
! CopySendInt32(cstate, -1);
! }
! else
{
if (!cstate->opts.binary)
{
! string = OutputFunctionCall(&out_functions[attnum - 1],
! value);
! if (cstate->opts.csv_mode)
! CopyAttributeOutCSV(cstate, string,
! cstate->opts.force_quote_flags[attnum - 1],
! list_length(cstate->attnumlist) == 1);
else
! CopyAttributeOutText(cstate, string);
}
else
{
! bytea *outputbytes;
! outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! value);
! CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! CopySendData(cstate, VARDATA(outputbytes),
! VARSIZE(outputbytes) - VARHDRSZ);
}
}
}
CopySendEndOfRow(cstate);
--- 935,1015 ----
CopySendInt16(cstate, list_length(cstate->attnumlist));
}
! if (!cstate->opts.json_mode)
{
! bool need_delim = false;
! FmgrInfo *out_functions = cstate->out_functions;
! ListCell *cur;
! char *string;
! /* Make sure the tuple is fully deconstructed */
! slot_getallattrs(slot);
! foreach(cur, cstate->attnumlist)
{
+ int attnum = lfirst_int(cur);
+ Datum value = slot->tts_values[attnum - 1];
+ bool isnull = slot->tts_isnull[attnum - 1];
+
if (!cstate->opts.binary)
{
! if (need_delim)
! CopySendChar(cstate, cstate->opts.delim[0]);
! need_delim = true;
! }
!
! if (isnull)
! {
! if (!cstate->opts.binary)
! CopySendString(cstate, cstate->opts.null_print_client);
else
! CopySendInt32(cstate, -1);
}
else
{
! if (!cstate->opts.binary)
! {
! string = OutputFunctionCall(&out_functions[attnum - 1],
! value);
! if (cstate->opts.csv_mode)
! CopyAttributeOutCSV(cstate, string,
! cstate->opts.force_quote_flags[attnum - 1],
! list_length(cstate->attnumlist) == 1);
! else
! CopyAttributeOutText(cstate, string);
! }
! else
! {
! bytea *outputbytes;
! outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! value);
! CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! CopySendData(cstate, VARDATA(outputbytes),
! VARSIZE(outputbytes) - VARHDRSZ);
! }
}
}
}
+ else
+ {
+ Datum rowdata = ExecFetchSlotHeapTupleDatum(slot);
+ StringInfo result;
+
+ result = makeStringInfo();
+ composite_to_json(rowdata, result, false);
+
+ if (json_row_delim_needed)
+ CopySendChar(cstate, ',');
+ else
+ {
+ /* first row needs no delimiter */
+ CopySendChar(cstate, ' ');
+ json_row_delim_needed = true;
+ }
+
+ CopyAttributeOutText(cstate, result->data);
+ }
CopySendEndOfRow(cstate);
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 71ae53f..cb4311e 100644
*** a/src/backend/utils/adt/json.c
--- b/src/backend/utils/adt/json.c
*************** typedef struct JsonAggState
*** 83,90 ****
JsonUniqueBuilderState unique_check;
} JsonAggState;
- static void composite_to_json(Datum composite, StringInfo result,
- bool use_line_feeds);
static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
Datum *vals, bool *nulls, int *valcount,
JsonTypeCategory tcategory, Oid outfuncoid,
--- 83,88 ----
*************** array_to_json_internal(Datum array, Stri
*** 490,497 ****
/*
* Turn a composite / record into JSON.
*/
! static void
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
{
HeapTupleHeader td;
--- 488,496 ----
/*
* Turn a composite / record into JSON.
+ * Exported so COPY TO can use it.
*/
! void
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
{
HeapTupleHeader td;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index f2cca0b..e66bd01 100644
*** a/src/include/commands/copy.h
--- b/src/include/commands/copy.h
*************** typedef struct CopyFormatOptions
*** 43,48 ****
--- 43,49 ----
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
+ bool json_mode; /* JSON format? */
CopyHeaderChoice header_line; /* header line? */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index f07e82c..badc5a6 100644
*** a/src/include/utils/json.h
--- b/src/include/utils/json.h
***************
*** 17,22 ****
--- 17,24 ----
#include "lib/stringinfo.h"
/* functions in json.c */
+ extern void composite_to_json(Datum composite, StringInfo result,
+ bool use_line_feeds);
extern void escape_json(StringInfo buf, const char *str);
extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
const int *tzp);
view thread (35+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected]
Subject: Re: Emitting JSON to file using COPY TO
In-Reply-To: <[email protected]>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox