From 560481ec3408bf98ccf696ffb99b6c8ed1eeb6be Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Tue, 31 Dec 2024 15:05:39 -0800 Subject: [PATCH v5 2/4] Allow using jumbling logic outside of query jumble unit file This can be useful either for jumbling expressions in other contexts (e.g. to calculate a plan jumble), or to allow extensions to use a modified jumbling logic more easily. This intentionally supports the use case where a separate jumbling logic does not care about recording constants, as the query jumble does. --- .../pg_stat_statements/pg_stat_statements.c | 2 +- src/backend/commands/createas.c | 2 +- src/backend/commands/extension.c | 2 +- src/backend/commands/portalcmds.c | 2 +- src/backend/executor/execMain.c | 2 +- src/backend/nodes/Makefile | 6 +- src/backend/nodes/README | 4 +- src/backend/nodes/gen_node_support.pl | 14 +-- .../{queryjumblefuncs.c => jumblefuncs.c} | 109 ++++++++++-------- src/backend/nodes/meson.build | 2 +- src/backend/parser/analyze.c | 2 +- src/backend/postmaster/launch_backend.c | 2 +- src/backend/utils/misc/guc_tables.c | 2 +- src/include/nodes/{queryjumble.h => jumble.h} | 18 ++- src/include/nodes/meson.build | 2 +- src/include/nodes/nodes.h | 2 +- src/include/parser/analyze.h | 2 +- 17 files changed, 98 insertions(+), 77 deletions(-) rename src/backend/nodes/{queryjumblefuncs.c => jumblefuncs.c} (78%) rename src/include/nodes/{queryjumble.h => jumble.h} (80%) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index bebf8134eb..26ef7f3e03 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -55,7 +55,7 @@ #include "jit/jit.h" #include "mb/pg_wchar.h" #include "miscadmin.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "optimizer/planner.h" #include "parser/analyze.h" #include "parser/scanner.h" diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 23cecd99c9..a8498e370c 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -37,7 +37,7 @@ #include "commands/view.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "parser/analyze.h" #include "rewrite/rewriteHandler.h" #include "tcop/tcopprot.h" diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index ba540e3de5..3a462d708b 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -54,7 +54,7 @@ #include "funcapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "storage/fd.h" #include "tcop/utility.h" #include "utils/acl.h" diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index e7c8171c10..926ec2af36 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -28,7 +28,7 @@ #include "executor/executor.h" #include "executor/tstoreReceiver.h" #include "miscadmin.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "parser/analyze.h" #include "rewrite/rewriteHandler.h" #include "tcop/pquery.h" diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 604cb0625b..fa6002d5f9 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -51,7 +51,7 @@ #include "foreign/fdwapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "parser/parse_relation.h" #include "pgstat.h" #include "rewrite/rewriteHandler.h" diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile index 77ddb9ca53..4545649e2d 100644 --- a/src/backend/nodes/Makefile +++ b/src/backend/nodes/Makefile @@ -26,7 +26,7 @@ OBJS = \ outfuncs.o \ params.o \ print.o \ - queryjumblefuncs.o \ + jumblefuncs.o \ read.o \ readfuncs.o \ tidbitmap.o \ @@ -91,8 +91,8 @@ $(top_builddir)/src/include/nodes/header-stamp: node-support-stamp copyfuncs.o: copyfuncs.c copyfuncs.funcs.c copyfuncs.switch.c | node-support-stamp equalfuncs.o: equalfuncs.c equalfuncs.funcs.c equalfuncs.switch.c | node-support-stamp outfuncs.o: outfuncs.c outfuncs.funcs.c outfuncs.switch.c | node-support-stamp -queryjumblefuncs.o: queryjumblefuncs.c queryjumblefuncs.funcs.c queryjumblefuncs.switch.c | node-support-stamp +jumblefuncs.o: jumblefuncs.c jumblefuncs.funcs.c jumblefuncs.switch.c | node-support-stamp readfuncs.o: readfuncs.c readfuncs.funcs.c readfuncs.switch.c | node-support-stamp clean: - rm -f node-support-stamp $(addsuffix funcs.funcs.c,copy equal out queryjumble read) $(addsuffix funcs.switch.c,copy equal out queryjumble read) nodetags.h + rm -f node-support-stamp $(addsuffix funcs.funcs.c,copy equal out jumble read) $(addsuffix funcs.switch.c,copy equal out jumble read) nodetags.h diff --git a/src/backend/nodes/README b/src/backend/nodes/README index f8bbd60538..a43290fbab 100644 --- a/src/backend/nodes/README +++ b/src/backend/nodes/README @@ -51,7 +51,7 @@ FILES IN THIS DIRECTORY (src/backend/nodes/) readfuncs.c - convert text representation back to a node tree (*) makefuncs.c - creator functions for some common node types nodeFuncs.c - some other general-purpose manipulation functions - queryjumblefuncs.c - compute a node tree for query jumbling (*) + jumblefuncs.c - compute a node tree for jumbling (*) (*) - Most functions in these files are generated by gen_node_support.pl and #include'd there. @@ -88,7 +88,7 @@ Suppose you want to define a node Foo: If you intend to inherit from, say a Plan node, put Plan as the first field of your struct definition. (The T_Foo tag is created automatically.) 2. Check that the generated support functions in copyfuncs.funcs.c, - equalfuncs.funcs.c, outfuncs.funcs.c, queryjumblefuncs.funcs.c and + equalfuncs.funcs.c, outfuncs.funcs.c, jumblefuncs.funcs.c and readfuncs.funcs.c look correct. Add attributes as necessary to control the outcome. (For some classes of node types, you don't need all the support functions. Use node attributes similar to those of related node types.) diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 1a657f7e0a..26ec7e0d59 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -1244,15 +1244,15 @@ close $ofs; close $rfs; -# queryjumblefuncs.c +# jumblefuncs.c -push @output_files, 'queryjumblefuncs.funcs.c'; -open my $jff, '>', "$output_path/queryjumblefuncs.funcs.c$tmpext" or die $!; -push @output_files, 'queryjumblefuncs.switch.c'; -open my $jfs, '>', "$output_path/queryjumblefuncs.switch.c$tmpext" or die $!; +push @output_files, 'jumblefuncs.funcs.c'; +open my $jff, '>', "$output_path/jumblefuncs.funcs.c$tmpext" or die $!; +push @output_files, 'jumblefuncs.switch.c'; +open my $jfs, '>', "$output_path/jumblefuncs.switch.c$tmpext" or die $!; -printf $jff $header_comment, 'queryjumblefuncs.funcs.c'; -printf $jfs $header_comment, 'queryjumblefuncs.switch.c'; +printf $jff $header_comment, 'jumblefuncs.funcs.c'; +printf $jfs $header_comment, 'jumblefuncs.switch.c'; print $jff $node_includes; diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/jumblefuncs.c similarity index 78% rename from src/backend/nodes/queryjumblefuncs.c rename to src/backend/nodes/jumblefuncs.c index b103a28193..fcf34ae479 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/jumblefuncs.c @@ -1,32 +1,36 @@ /*------------------------------------------------------------------------- * - * queryjumblefuncs.c - * Query normalization and fingerprinting. + * jumblefuncs.c + * Fingerprinting and jumbling. * - * Normalization is a process whereby similar queries, typically differing only - * in their constants (though the exact rules are somewhat more subtle than - * that) are recognized as equivalent, and are tracked as a single entry. This - * is particularly useful for non-prepared queries. + * Fingerprinting selectively serializes key fields within a tree structure, + * such as a Query or Plan tree, to create a unique identifier while ignoring + * extraneous details. These essential fields are concatenated into a jumble, + * from which a 64-bit hash is computed. Unlike regular serialization, this + * approach excludes irrelevant information. * - * Normalization is implemented by fingerprinting queries, selectively - * serializing those fields of each query tree's nodes that are judged to be - * essential to the query. This is referred to as a query jumble. This is - * distinct from a regular serialization in that various extraneous - * information is ignored as irrelevant or not essential to the query, such - * as the collations of Vars and, most notably, the values of constants. + * Use Cases: * - * This jumble is acquired at the end of parse analysis of each query, and - * a 64-bit hash of it is stored into the query's Query.queryId field. - * The server then copies this value around, making it available in plan - * tree(s) generated from the query. The executor can then use this value - * to blame query costs on the proper queryId. + * 1. In-Core Query Normalization & Identification * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California + * Fingerprinting is used to normalize query trees by generating a hash stored + * in the Query.queryId field. This ID is propagated to plan tree(s), allowing + * the executor to attribute query costs on the proper queryId. The process + * excludes information like typmod, collation, and most notably, the values + * of constants. * + * Example: The following queries produce the same queryId: + * + * SELECT t.* FROM s1.t WHERE c1 = 1; + * SELECT t.* FROM s1.t WHERE c1 = 2; + * + * 2. Modified jumbling logic for extensions + * + * Extensions can modify the fingerprinting logic for queryId, or fingerprint + * other types of trees, such as a plan tree, to compute a plan identifier. * * IDENTIFICATION - * src/backend/nodes/queryjumblefuncs.c + * src/backend/nodes/jumblefuncs.c * *------------------------------------------------------------------------- */ @@ -34,7 +38,7 @@ #include "common/hashfn.h" #include "miscadmin.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "parser/scansup.h" #define JUMBLE_SIZE 1024 /* query serialization buffer size */ @@ -51,10 +55,7 @@ int compute_query_id = COMPUTE_QUERY_ID_AUTO; */ bool query_id_enabled = false; -static void AppendJumble(JumbleState *jstate, - const unsigned char *item, Size size); static void RecordConstLocation(JumbleState *jstate, int location); -static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); @@ -109,28 +110,42 @@ CleanQuerytext(const char *query, int *location, int *len) } JumbleState * -JumbleQuery(Query *query) +InitializeJumbleState(bool record_clocations) { - JumbleState *jstate = NULL; - - Assert(IsQueryIdEnabled()); - - jstate = (JumbleState *) palloc(sizeof(JumbleState)); + JumbleState *jstate = (JumbleState *) palloc0(sizeof(JumbleState)); /* Set up workspace for query jumbling */ jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); jstate->jumble_len = 0; - jstate->clocations_buf_size = 32; - jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); - jstate->clocations_count = 0; - jstate->highest_extern_param_id = 0; + + if (record_clocations) + { + jstate->clocations_buf_size = 32; + jstate->clocations = (LocationLen *) + palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + } + + return jstate; +} + +uint64 +HashJumbleState(JumbleState *jstate) +{ + return DatumGetUInt64(hash_any_extended(jstate->jumble, + jstate->jumble_len, + 0)); +} + +JumbleState * +JumbleQuery(Query *query) +{ + JumbleState *jstate = InitializeJumbleState(true); + + Assert(IsQueryIdEnabled()); /* Compute query ID and mark the Query node with it */ - _jumbleNode(jstate, (Node *) query); - query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + JumbleNode(jstate, (Node *) query); + query->queryId = HashJumbleState(jstate); /* * If we are unlucky enough to get a hash of zero, use 1 instead for @@ -164,7 +179,7 @@ EnableQueryId(void) * AppendJumble: Append a value that is substantive in a given query to * the current jumble. */ -static void +void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) { unsigned char *jumble = jstate->jumble; @@ -205,7 +220,7 @@ static void RecordConstLocation(JumbleState *jstate, int location) { /* -1 indicates unknown or undefined location */ - if (location >= 0) + if (location >= 0 && jstate->clocations_buf_size > 0) { /* enlarge array if needed */ if (jstate->clocations_count >= jstate->clocations_buf_size) @@ -224,7 +239,7 @@ RecordConstLocation(JumbleState *jstate, int location) } #define JUMBLE_NODE(item) \ - _jumbleNode(jstate, (Node *) expr->item) + JumbleNode(jstate, (Node *) expr->item) #define JUMBLE_LOCATION(location) \ RecordConstLocation(jstate, expr->location) #define JUMBLE_FIELD(item) \ @@ -237,10 +252,10 @@ do { \ AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \ } while(0) -#include "queryjumblefuncs.funcs.c" +#include "jumblefuncs.funcs.c" -static void -_jumbleNode(JumbleState *jstate, Node *node) +void +JumbleNode(JumbleState *jstate, Node *node) { Node *expr = node; @@ -258,7 +273,7 @@ _jumbleNode(JumbleState *jstate, Node *node) switch (nodeTag(expr)) { -#include "queryjumblefuncs.switch.c" +#include "jumblefuncs.switch.c" case T_List: case T_IntList: @@ -305,7 +320,7 @@ _jumbleList(JumbleState *jstate, Node *node) { case T_List: foreach(l, expr) - _jumbleNode(jstate, lfirst(l)); + JumbleNode(jstate, lfirst(l)); break; case T_IntList: foreach(l, expr) diff --git a/src/backend/nodes/meson.build b/src/backend/nodes/meson.build index 9a1c1b7b98..b7ebb86bab 100644 --- a/src/backend/nodes/meson.build +++ b/src/backend/nodes/meson.build @@ -19,7 +19,7 @@ backend_sources += files( nodefunc_sources = files( 'copyfuncs.c', 'equalfuncs.c', - 'queryjumblefuncs.c', + 'jumblefuncs.c', 'outfuncs.c', 'readfuncs.c', ) diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 76f58b3aca..cd235921b4 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -31,7 +31,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "optimizer/optimizer.h" #include "parser/analyze.h" #include "parser/parse_agg.h" diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index a97a1eda6d..b127b1bef3 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -53,7 +53,7 @@ #include "utils/memutils.h" #ifdef EXEC_BACKEND -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "storage/pg_shmem.h" #include "storage/spin.h" #endif diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index ce7534d4d2..9a22512fef 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -50,7 +50,7 @@ #include "libpq/auth.h" #include "libpq/libpq.h" #include "libpq/scram.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "optimizer/cost.h" #include "optimizer/geqo.h" #include "optimizer/optimizer.h" diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/jumble.h similarity index 80% rename from src/include/nodes/queryjumble.h rename to src/include/nodes/jumble.h index 50eb956658..7587c9f708 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/jumble.h @@ -1,18 +1,18 @@ /*------------------------------------------------------------------------- * - * queryjumble.h - * Query normalization and fingerprinting. + * jumble.h + * Fingerprinting and jumbling. * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * src/include/nodes/queryjumble.h + * src/include/nodes/jumble.h * *------------------------------------------------------------------------- */ -#ifndef QUERYJUMBLE_H -#define QUERYJUMBLE_H +#ifndef JUMBLE_H +#define JUMBLE_H #include "nodes/parsenodes.h" @@ -83,4 +83,10 @@ IsQueryIdEnabled(void) return query_id_enabled; } -#endif /* QUERYJUMBLE_H */ +/* Functions called for plan jumbling or extensions doing their own jumbling */ +extern JumbleState *InitializeJumbleState(bool record_clocations); +extern void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); +extern void JumbleNode(JumbleState *jstate, Node *node); +extern uint64 HashJumbleState(JumbleState *jstate); + +#endif /* JUMBLE_H */ diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build index d1ca24dd32..5c0ee5b0f6 100644 --- a/src/include/nodes/meson.build +++ b/src/include/nodes/meson.build @@ -37,7 +37,7 @@ node_support_output = [ 'readfuncs.funcs.c', 'readfuncs.switch.c', 'copyfuncs.funcs.c', 'copyfuncs.switch.c', 'equalfuncs.funcs.c', 'equalfuncs.switch.c', - 'queryjumblefuncs.funcs.c', 'queryjumblefuncs.switch.c', + 'jumblefuncs.funcs.c', 'jumblefuncs.switch.c', ] node_support_install = [ dir_include_server / 'nodes', diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 580238bfab..7330d5ffc4 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -53,7 +53,7 @@ typedef enum NodeTag * - custom_read_write: Has custom implementations in outfuncs.c and * readfuncs.c. * - * - custom_query_jumble: Has custom implementation in queryjumblefuncs.c. + * - custom_query_jumble: Has custom implementation in jumblefuncs.c. * * - no_copy: Does not support copyObject() at all. * diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h index f1bd18c49f..e414b630bc 100644 --- a/src/include/parser/analyze.h +++ b/src/include/parser/analyze.h @@ -15,7 +15,7 @@ #define ANALYZE_H #include "nodes/params.h" -#include "nodes/queryjumble.h" +#include "nodes/jumble.h" #include "parser/parse_node.h" /* Hook for plugins to get control at end of parse analysis */ -- 2.47.1