From 40947834d58131c0b928d4bbe79fe885d482fccc Mon Sep 17 00:00:00 2001 From: Florents Tselai Date: Sun, 5 Apr 2026 11:52:03 +0300 Subject: [PATCH v6] Add tsmatch JSONPath operator for granular Full Text Search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces the tsmatch boolean operator to the JSONPath engine. By integrating FTS natively into path expressions, this operator allows for high-precision filtering of nested JSONB structures—solving issues with structural ambiguity and query complexity. Currently, users must choose between two suboptimal paths for searching nested JSON: 1. Imprecise Global Indexing jsonb_to_tsvector aggregates text into a flat vector. This ignores JSON boundaries, leading to false positives when the same key (e.g., "body") appears in different contexts (e.g., a "Product Description" vs. a "Customer Review"). 2. Complex SQL Workarounds Achieving 100% precision requires "exploding" the document via jsonb_array_elements and LATERAL joins. This leads to verbose SQL and high memory overhead from generating intermediate heap tuples. One of the most significant advantages of tsmatch is its ability to participate in multi-condition predicates within the same JSON object— something jsonb_to_tsvector cannot do. SELECT jsonb_path_query(doc, '$.comments[*] ? (@.user == "Alice" && @.body tsmatch "performance")'); In a flat vector, the association between "Alice" and "performance" is lost. tsmatch preserves this link by evaluating the FTS predicate in-place during path traversal. While the SQL/JSON standard (ISO/IEC 9075-2) does not explicitly define an FTS operator, tsmatch is architecturally modeled after the standard-defined like_regex. The operator supports optional configuration for both the dictionary and the query parser: @ tsmatch "query" [ tsconfig "regconfig" ] [ tsqparser "mode" ] Supported parser modes are: - "pl": plainto_tsquery (no operators required) - "ph": phraseto_tsquery - "w": websearch_to_tsquery - Omitted: Defaults to to_tsquery (strict mode) The implementation relies on GIN path-matching for index pruning and heap re-checks for precision. Caching is scoped to the JsonPathExecContext, ensuring 'compile-once' efficiency for the tsquery and OID lookup per execution, respecting the stability requirements of prepared statements. --- doc/src/sgml/func/func-json.sgml | 35 +++++ src/backend/utils/adt/jsonpath.c | 128 +++++++++++++++- src/backend/utils/adt/jsonpath_exec.c | 147 ++++++++++++++++++- src/backend/utils/adt/jsonpath_gram.y | 117 ++++++++++++++- src/backend/utils/adt/jsonpath_scan.l | 3 + src/include/utils/jsonpath.h | 19 +++ src/test/regress/expected/jsonb_jsonpath.out | 58 ++++++++ src/test/regress/expected/jsonpath.out | 62 ++++++++ src/test/regress/sql/jsonb_jsonpath.sql | 13 ++ src/test/regress/sql/jsonpath.sql | 19 +++ 10 files changed, 597 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/func/func-json.sgml b/doc/src/sgml/func/func-json.sgml index 4cd338fe6e3..9c8905668ac 100644 --- a/doc/src/sgml/func/func-json.sgml +++ b/doc/src/sgml/func/func-json.sgml @@ -3247,6 +3247,41 @@ ERROR: jsonpath member accessor can only be applied to an object [] + + + json_string tsmatch query + tsconfig config_name + tsqparser parser_mode + boolean + + + Tests whether the JSON string (first operand) matches the full-text search query + (second operand). The matching behavior can be customized using the optional + tsconfig and tsqparser clauses. + If tsconfig is omitted, the current session's default text search configuration + is used (see ). + The tsqparser clause determines how the query string is parsed + (see ). + Valid options are "pl" (plainto_tsquery), + "ph" (phraseto_tsquery), and + "w" (websearch_to_tsquery). + If tsqparser is omitted, the query is parsed using to_tsquery. + tsconfig and tsqparser can appear in any order. + + + jsonb_path_query_array('["running", "runs", "ran", "jogging"]', '$[*] ? (@ tsmatch "run" tsconfig "english")') + ["running", "runs"] + + + jsonb_path_query_array('["fast car", "slow car", "fast and furious"]', '$[*] ? (@ tsmatch "fast & car")') + ["fast car"] + + + jsonb_path_query_array('["fast car", "slow car", "fast and furious"]', '$[*] ? (@ tsmatch "fast car" tsqparser "w")') + ["fast car"] + + + diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c index 7bfc18c9888..e551d65dc25 100644 --- a/src/backend/utils/adt/jsonpath.c +++ b/src/backend/utils/adt/jsonpath.c @@ -351,6 +351,55 @@ flattenJsonPathParseItem(StringInfo buf, int *result, struct Node *escontext, *(int32 *) (buf->data + offs) = chld - pos; } break; + case jpiTsMatch: + { + int32 expr_off; + int32 tsconfig_off; + uint32 tsqparser_len_val = item->value.tsmatch.tsqparser ? item->value.tsmatch.tsqparser_len : 0; + + expr_off = reserveSpaceForItemPointer(buf); + tsconfig_off = reserveSpaceForItemPointer(buf); + + /* + * Write all integers FIRST so they are naturally 4-byte + * aligned + */ + appendBinaryStringInfo(buf, &item->value.tsmatch.tsquery_len, sizeof(uint32)); + appendBinaryStringInfo(buf, &tsqparser_len_val, sizeof(uint32)); + + /* Now append the strings at the end */ + appendBinaryStringInfo(buf, item->value.tsmatch.tsquery, item->value.tsmatch.tsquery_len); + appendStringInfoChar(buf, '\0'); + + if (item->value.tsmatch.tsqparser) + { + appendBinaryStringInfo(buf, item->value.tsmatch.tsqparser, tsqparser_len_val); + appendStringInfoChar(buf, '\0'); + } + + if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.tsmatch.doc, + nestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + expr_off) = chld - pos; + + /* TSConfig (Optional) */ + if (item->value.tsmatch.tsconfig) + { + if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.tsmatch.tsconfig, + nestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + tsconfig_off) = chld - pos; + } + else + { + *(int32 *) (buf->data + tsconfig_off) = 0; + } + } + break; case jpiFilter: argNestingLevel++; pg_fallthrough; @@ -766,6 +815,55 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, appendStringInfoChar(buf, '"'); } + break; + case jpiTsMatch: + if (printBracketes) + appendStringInfoChar(buf, '('); + + jspInitByBuffer(&elem, v->base, v->content.tsmatch.doc); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + + appendStringInfoString(buf, " tsmatch "); + + escape_json_with_len(buf, + v->content.tsmatch.tsquery, + v->content.tsmatch.tsquery_len); + + /* Print TSConfig if present */ + if (v->content.tsmatch.tsconfig) + { + JsonPathItem config_item; + int32 config_len; + char *config_str; + + appendStringInfoString(buf, " tsconfig "); + jspInitByBuffer(&config_item, v->base, v->content.tsmatch.tsconfig); + config_str = jspGetString(&config_item, &config_len); + appendStringInfoChar(buf, '"'); + appendBinaryStringInfo(buf, config_str, config_len); + appendStringInfoChar(buf, '"'); + } + + if (v->content.tsmatch.tsqparser_len > 0) + { + appendStringInfoString(buf, " tsqparser "); + appendStringInfoChar(buf, '"'); + + /* + * Use simple binary append since flags like "pl" don't need + * JSON escaping + */ + appendBinaryStringInfo(buf, + v->content.tsmatch.tsqparser, + v->content.tsmatch.tsqparser_len); + appendStringInfoChar(buf, '"'); + } + + if (printBracketes) + appendStringInfoChar(buf, ')'); + break; if (printBracketes) appendStringInfoChar(buf, ')'); @@ -976,6 +1074,8 @@ jspOperationName(JsonPathItemType type) return "timestamp"; case jpiTimestampTz: return "timestamp_tz"; + case jpiTsMatch: + return "tsmatch"; case jpiStrReplace: return "replace"; case jpiStrLower: @@ -1158,6 +1258,27 @@ jspInitByBuffer(JsonPathItem *v, char *base, int32 pos) read_int32(v->content.like_regex.patternlen, base, pos); v->content.like_regex.pattern = base + pos; break; + case jpiTsMatch: + /* FIX: Read all integers first */ + read_int32(v->content.tsmatch.doc, base, pos); + read_int32(v->content.tsmatch.tsconfig, base, pos); + read_int32(v->content.tsmatch.tsquery_len, base, pos); + read_int32(v->content.tsmatch.tsqparser_len, base, pos); + + /* Set pointers to the strings in the buffer */ + v->content.tsmatch.tsquery = base + pos; + pos += v->content.tsmatch.tsquery_len + 1; + + if (v->content.tsmatch.tsqparser_len > 0) + { + v->content.tsmatch.tsqparser = base + pos; + pos += v->content.tsmatch.tsqparser_len + 1; + } + else + { + v->content.tsmatch.tsqparser = NULL; + } + break; default: elog(ERROR, "unrecognized jsonpath item type: %d", v->type); } @@ -1231,6 +1352,7 @@ jspGetNext(JsonPathItem *v, JsonPathItem *a) v->type == jpiLast || v->type == jpiStartsWith || v->type == jpiLikeRegex || + v->type == jpiTsMatch || v->type == jpiBigint || v->type == jpiBoolean || v->type == jpiDate || @@ -1575,7 +1697,11 @@ jspIsMutableWalker(JsonPathItem *jpi, struct JsonPathMutableContext *cxt) jspInitByBuffer(&arg, jpi->base, jpi->content.like_regex.expr); jspIsMutableWalker(&arg, cxt); break; - + case jpiTsMatch: + Assert(status == jpdsNonDateTime); + jspInitByBuffer(&arg, jpi->base, jpi->content.tsmatch.doc); + jspIsMutableWalker(&arg, cxt); + break; /* literals */ case jpiNull: case jpiString: diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index 770840a0611..de8937fdb08 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -59,13 +59,17 @@ #include "postgres.h" +#include "catalog/namespace.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "miscadmin.h" #include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" +#include "nodes/pg_list.h" #include "regex/regex.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_utils.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/datetime.h" @@ -74,6 +78,7 @@ #include "utils/json.h" #include "utils/jsonpath.h" #include "utils/memutils.h" +#include "utils/regproc.h" #include "utils/timestamp.h" /* @@ -114,6 +119,7 @@ typedef struct JsonPathExecContext bool throwErrors; /* with "false" all suppressible errors are * suppressed */ bool useTz; + List *tsmatch_cache; /* Persists compiled FTS queries */ } JsonPathExecContext; /* Context for LIKE_REGEX execution. */ @@ -123,6 +129,14 @@ typedef struct JsonLikeRegexContext int cflags; } JsonLikeRegexContext; +/* Context for tsmatch execution. */ +typedef struct JsonTsMatchCacheEntry +{ + void *jsp_ptr; /* Cache key: pointer to the AST node */ + Datum queryDatum; /* Cached compiled query */ + Oid tsconfigId; /* Cached dictionary OID */ +} JsonTsMatchCacheEntry; + /* Result of jsonpath predicate evaluation */ typedef enum JsonPathBool { @@ -335,6 +349,7 @@ static JsonPathExecResult executeKeyValueMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb, JsonValueList *found); static JsonPathExecResult appendBoolResult(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonValueList *found, JsonPathBool res); +static JsonPathBool executeTsMatch(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg, void *param); static void getJsonPathItem(JsonPathExecContext *cxt, JsonPathItem *item, JsonbValue *value); static JsonbValue *GetJsonPathVar(void *cxt, char *varName, int varNameLen, @@ -740,6 +755,7 @@ executeJsonPath(JsonPath *path, void *vars, JsonPathGetVarCallback getVar, cxt.innermostArraySize = -1; cxt.throwErrors = throwErrors; cxt.useTz = useTz; + cxt.tsmatch_cache = NIL; if (jspStrictAbsenceOfErrors(&cxt) && !result) { @@ -840,6 +856,7 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp, case jpiExists: case jpiStartsWith: case jpiLikeRegex: + case jpiTsMatch: { JsonPathBool st = executeBoolItem(cxt, jsp, jb, true); @@ -1914,6 +1931,18 @@ executeBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp, return executePredicate(cxt, jsp, &larg, NULL, jb, false, executeLikeRegex, &lrcxt); } + case jpiTsMatch: + { + jspInitByBuffer(&larg, jsp->base, + jsp->content.tsmatch.doc); + + /* + * Pass 'cxt' as the param so executeTsMatch can access the + * cache list + */ + return executePredicate(cxt, jsp, &larg, NULL, jb, false, + executeTsMatch, cxt); + } case jpiExists: jspGetArg(jsp, &larg); @@ -1952,7 +1981,6 @@ executeBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp, return res == jperOk ? jpbTrue : jpbFalse; } - default: elog(ERROR, "invalid boolean jsonpath item type: %d", jsp->type); return jpbUnknown; @@ -3186,6 +3214,123 @@ executeKeyValueMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, return res; } +static JsonPathBool +executeTsMatch(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg, + void *param) +{ + JsonPathExecContext *cxt = (JsonPathExecContext *) param; + JsonTsMatchCacheEntry *cache = NULL; + ListCell *lc; + text *doc_text; + Datum tsvector_datum; + bool match; + + if (!(str = getScalar(str, jbvString))) + return jpbUnknown; + + /* Check if we already compiled the query for this specific AST node */ + foreach(lc, cxt->tsmatch_cache) + { + JsonTsMatchCacheEntry *entry = (JsonTsMatchCacheEntry *) lfirst(lc); + + if (entry->jsp_ptr == (void *) jsp) + { + cache = entry; + break; + } + } + + /* If not found, parse, compile, and cache it */ + if (cache == NULL) + { + text *query_text; + char *parser_mode; + uint32 parser_len; + + /* Allocate new cache entry in the execution memory context */ + cache = palloc0(sizeof(JsonTsMatchCacheEntry)); + cache->jsp_ptr = (void *) jsp; + + if (jsp->content.tsmatch.tsconfig != 0) + { + JsonPathItem config_item; + int32 config_len; + char *config_str; + char *safe_config_str; + + jspInitByBuffer(&config_item, jsp->base, jsp->content.tsmatch.tsconfig); + config_str = jspGetString(&config_item, &config_len); + + /* Null-terminate for safe catalog lookup */ + safe_config_str = pnstrdup(config_str, config_len); + cache->tsconfigId = get_ts_config_oid(stringToQualifiedNameList(safe_config_str, NULL), false); + pfree(safe_config_str); + } + else + { + cache->tsconfigId = getTSCurrentConfig(true); + } + + /* Prepare Query Text */ + query_text = cstring_to_text_with_len(jsp->content.tsmatch.tsquery, + jsp->content.tsmatch.tsquery_len); + + /* Select Parser and Compile Query */ + parser_mode = jsp->content.tsmatch.tsqparser; + parser_len = jsp->content.tsmatch.tsqparser_len; + + if (parser_len > 0) + { + if (parser_len == 2 && pg_strncasecmp(parser_mode, "pl", 2) == 0) + { + cache->queryDatum = DirectFunctionCall2(plainto_tsquery_byid, + ObjectIdGetDatum(cache->tsconfigId), + PointerGetDatum(query_text)); + } + else if (parser_len == 2 && pg_strncasecmp(parser_mode, "ph", 2) == 0) + { + cache->queryDatum = DirectFunctionCall2(phraseto_tsquery_byid, + ObjectIdGetDatum(cache->tsconfigId), + PointerGetDatum(query_text)); + } + else if (parser_len == 1 && pg_strncasecmp(parser_mode, "w", 1) == 0) + { + cache->queryDatum = DirectFunctionCall2(websearch_to_tsquery_byid, + ObjectIdGetDatum(cache->tsconfigId), + PointerGetDatum(query_text)); + } + else + { + pg_unreachable(); + } + } + else + { + cache->queryDatum = DirectFunctionCall2(to_tsquery_byid, + ObjectIdGetDatum(cache->tsconfigId), + PointerGetDatum(query_text)); + } + + /* Append the newly compiled cache entry to the context's list */ + cxt->tsmatch_cache = lappend(cxt->tsmatch_cache, cache); + } + + /* Runtime Execution (Using the cached query) */ + doc_text = cstring_to_text_with_len(str->val.string.val, + str->val.string.len); + + tsvector_datum = DirectFunctionCall2(to_tsvector_byid, + ObjectIdGetDatum(cache->tsconfigId), + PointerGetDatum(doc_text)); + + match = DatumGetBool(DirectFunctionCall2(ts_match_vq, + tsvector_datum, + cache->queryDatum)); + + return match ? jpbTrue : jpbFalse; +} + + /* * Convert boolean execution status 'res' to a boolean JSON item and execute * next jsonpath. diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index f826697d098..9f3aa0cbe14 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -43,7 +43,12 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *flags, JsonPathParseItem ** result, struct Node *escontext); - +static bool makeItemTsMatch(JsonPathParseItem *doc, + JsonPathString *tsquery, + JsonPathString *tsconfig, + JsonPathString *tsquery_parser, + JsonPathParseItem ** result, + struct Node *escontext); /* * Bison doesn't allocate anything that needs to live across parser calls, * so we can easily have it use palloc instead of malloc. This prevents @@ -73,6 +78,13 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr, JsonPathParseItem *value; JsonPathParseResult *result; JsonPathItemType optype; + struct + { + bool has_tsconfig; + JsonPathString tsconfig; + bool has_tsqparser; + JsonPathString tsqparser; + } tsmatch_opts; bool boolean; int integer; } @@ -81,7 +93,7 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr, %token IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P %token OR_P AND_P NOT_P %token LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P -%token ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P +%token ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P TSMATCH_P TSCONFIG_P TSQUERYPARSER_P %token ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P %token DATETIME_P %token BIGINT_P BOOLEAN_P DATE_P DECIMAL_P INTEGER_P NUMBER_P @@ -109,6 +121,8 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr, %type any_level +%type tsmatch_opts + %left OR_P %left AND_P %right NOT_P @@ -189,6 +203,17 @@ predicate: YYABORT; $$ = jppitem; } + | expr TSMATCH_P STRING_P tsmatch_opts + { + JsonPathParseItem *jppitem; + + if (!makeItemTsMatch($1, &$3, + $4.has_tsconfig ? &$4.tsconfig : NULL, + $4.has_tsqparser ? &$4.tsqparser : NULL, + &jppitem, escontext)) + YYABORT; + $$ = jppitem; + } ; starts_with_initial: @@ -337,6 +362,33 @@ str_str_args: str_elem ',' str_elem { $$ = list_make2($1, $3); } ; +tsmatch_opts: + /* EMPTY */ + { + $$.has_tsconfig = false; + $$.has_tsqparser = false; + } + | tsmatch_opts TSCONFIG_P STRING_P + { + $$ = $1; + if ($$.has_tsconfig) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate tsconfig option in tsmatch"))); + $$.has_tsconfig = true; + $$.tsconfig = $3; + } + | tsmatch_opts TSQUERYPARSER_P STRING_P + { + $$ = $1; + if ($$.has_tsqparser) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate tsqparser option in tsmatch"))); + $$.has_tsqparser = true; + $$.tsqparser = $3; + } + ; key: key_name { $$ = makeItemKey(&$1); } ; @@ -377,6 +429,9 @@ key_name: | TIME_TZ_P | TIMESTAMP_P | TIMESTAMP_TZ_P + | TSCONFIG_P + | TSMATCH_P + | TSQUERYPARSER_P | STR_LOWER_P | STR_UPPER_P | STR_INITCAP_P @@ -715,3 +770,61 @@ jspConvertRegexFlags(uint32 xflags, int *result, struct Node *escontext) return true; } + +static bool +makeItemTsMatch(JsonPathParseItem *doc, + JsonPathString *tsquery, + JsonPathString *tsconfig, + JsonPathString *tsquery_parser, + JsonPathParseItem **result, + struct Node *escontext) +{ + JsonPathParseItem *v = makeItemType(jpiTsMatch); + + v->value.tsmatch.doc = doc; + + v->value.tsmatch.tsquery = tsquery->val; + v->value.tsmatch.tsquery_len = tsquery->len; + + /* Handle the Configuration (Stored as a Node) */ + if (tsconfig) + { + JsonPathParseItem *conf = makeItemType(jpiString); + conf->value.string.val = tsconfig->val; + conf->value.string.len = tsconfig->len; + v->value.tsmatch.tsconfig = conf; + } + else + { + v->value.tsmatch.tsconfig = NULL; + } + + /* Handle the TSQuery Parser Flag */ + if (tsquery_parser) + { + /* Check for "pl", "ph", "w" here to throw a syntax error immediately */ + if (!(tsquery_parser->len == 2 && pg_strncasecmp(tsquery_parser->val, "pl", 2) == 0) && + !(tsquery_parser->len == 2 && pg_strncasecmp(tsquery_parser->val, "ph", 2) == 0) && + !(tsquery_parser->len == 1 && pg_strncasecmp(tsquery_parser->val, "w", 1) == 0)) + { + errsave(escontext, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid tsquery_parser value: \"%.*s\"", + (int) tsquery_parser->len, tsquery_parser->val), + errhint("Valid values are \"pl\", \"ph\", and \"w\"."))); + return false; + } + + v->value.tsmatch.tsqparser = tsquery_parser->val; + v->value.tsmatch.tsqparser_len = tsquery_parser->len; + } + else + { + v->value.tsmatch.tsqparser = NULL; + v->value.tsmatch.tsqparser_len = 0; + } + + /* Success */ + *result = v; + return true; +} diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index e4fadcc2e69..92b3e9be926 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -434,10 +434,13 @@ static const JsonPathKeyword keywords[] = { {7, false, INTEGER_P, "integer"}, {7, false, STR_REPLACE_P, "replace"}, {7, false, TIME_TZ_P, "time_tz"}, + {7, false, TSMATCH_P, "tsmatch"}, {7, false, UNKNOWN_P, "unknown"}, {8, false, DATETIME_P, "datetime"}, {8, false, KEYVALUE_P, "keyvalue"}, + {8, false, TSCONFIG_P, "tsconfig"}, {9, false, TIMESTAMP_P, "timestamp"}, + {9, false, TSQUERYPARSER_P, "tsqparser"}, {10, false, LIKE_REGEX_P, "like_regex"}, {10, false, STR_SPLIT_PART_P, "split_part"}, {12, false, TIMESTAMP_TZ_P, "timestamp_tz"}, diff --git a/src/include/utils/jsonpath.h b/src/include/utils/jsonpath.h index 8d27206e242..89753bf8bc7 100644 --- a/src/include/utils/jsonpath.h +++ b/src/include/utils/jsonpath.h @@ -104,6 +104,7 @@ typedef enum JsonPathItemType jpiLast, /* LAST array subscript */ jpiStartsWith, /* STARTS WITH predicate */ jpiLikeRegex, /* LIKE_REGEX predicate */ + jpiTsMatch, /* TSMATCH predicate */ jpiBigint, /* .bigint() item method */ jpiBoolean, /* .boolean() item method */ jpiDate, /* .date() item method */ @@ -196,6 +197,15 @@ typedef struct JsonPathItem int32 patternlen; uint32 flags; } like_regex; + struct + { + int32 doc; + char *tsquery; + uint32 tsquery_len; + int32 tsconfig; + char *tsqparser; + uint32 tsqparser_len; + } tsmatch; } content; } JsonPathItem; @@ -274,6 +284,15 @@ struct JsonPathParseItem uint32 len; char *val; /* could not be not null-terminated */ } string; + struct + { + JsonPathParseItem *doc; + char *tsquery; + uint32 tsquery_len; + JsonPathParseItem *tsconfig; + char *tsqparser; + uint32 tsqparser_len; + } tsmatch; } value; }; diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out index afa6c4cb529..ac78becc305 100644 --- a/src/test/regress/expected/jsonb_jsonpath.out +++ b/src/test/regress/expected/jsonb_jsonpath.out @@ -4891,3 +4891,61 @@ ORDER BY s1.num, s2.num; {"s": "B"} | {"s": "B"} | false | true | true | true | false (144 rows) +select jsonb_path_query('[null, 1, "running", "runs", "ran", "run", "runner", "jogging"]', 'lax $[*] ? (@ tsmatch "fly" tsconfig "english")'); + jsonb_path_query +------------------ +(0 rows) + +select jsonb_path_query('[null, 1, "running", "runs", "ran", "run", "runner", "jogging"]', 'lax $[*] ? (@ tsmatch "run" tsconfig "english")'); + jsonb_path_query +------------------ + "running" + "runs" + "run" +(3 rows) + +select jsonb_path_query('[null, 1, "running", "runs", "ran", "run", "runner", "jogging"]', 'lax $[*] ? (@ tsmatch "run" tsconfig "simple")'); + jsonb_path_query +------------------ + "run" +(1 row) + +select jsonb_path_query('[null, 1, "PostgreSQL", "postgres", "POSTGRES", "database"]', 'lax $[*] ? (@ tsmatch "Postgres" tsconfig "english")'); + jsonb_path_query +------------------ + "postgres" + "POSTGRES" +(2 rows) + +select jsonb_path_query('[null, 1, "PostgreSQL", "postgres", "POSTGRES", "database"]', 'lax $[*] ? (@ tsmatch "Postgres" tsconfig "simple")'); + jsonb_path_query +------------------ + "postgres" + "POSTGRES" +(2 rows) + +-- in the default tsqparser (to_tsquery) spaces are not allowed, so this should fail for syntax +select jsonb_path_query('["fast car", "super fast car", "fast and furious", "slow car"]', 'lax $[*] ? (@ tsmatch "fast car" tsconfig "english")'); +ERROR: syntax error in tsquery: "fast car" +-- if we specify "w" however it should be ok +select jsonb_path_query('["fast car", "super fast car", "fast and furious", "slow car"]', 'lax $[*] ? (@ tsmatch "fast car" tsconfig "english" tsqparser "w")'); + jsonb_path_query +------------------ + "fast car" + "super fast car" +(2 rows) + +-- it should also be ok if we change to a valid to_tsquery +select jsonb_path_query('["fast car", "super fast car", "fast and furious", "slow car"]', 'lax $[*] ? (@ tsmatch "fast & car" tsconfig "english")'); + jsonb_path_query +------------------ + "fast car" + "super fast car" +(2 rows) + +select jsonb_path_query('["fat cat", "cat fat", "fat rats"]', 'lax $[*] ? (@ tsmatch "fat & rat" tsconfig "english")'); + jsonb_path_query +------------------ + "fat rats" +(1 row) + diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out index ea971e79854..ba437085405 100644 --- a/src/test/regress/expected/jsonpath.out +++ b/src/test/regress/expected/jsonpath.out @@ -1480,3 +1480,65 @@ FROM unnest(ARRAY['$ ? (@ like_regex "pattern" flag "smixq")'::text, 1a | f | 42601 | trailing junk after numeric literal at or near "1a" of jsonpath input | | (5 rows) +-- tsmatch (Full Text Search) +-- basic success +select '$ ? (@ tsmatch "simple")'::jsonpath; + jsonpath +------------------------ + $?(@ tsmatch "simple") +(1 row) + +select '$ ? (@ tsmatch "running" tsconfig "english")'::jsonpath; + jsonpath +-------------------------------------------- + $?(@ tsmatch "running" tsconfig "english") +(1 row) + +-- w/out tsconfig and tsqparser +select '$ ? (@ tsmatch "fast & furious" tsconfig "simple")'::jsonpath; + jsonpath +-------------------------------------------------- + $?(@ tsmatch "fast & furious" tsconfig "simple") +(1 row) + +select '$ ? (@ tsmatch "fast & furious" tsconfig "simple" tsqparser "w")'::jsonpath; + jsonpath +---------------------------------------------------------------- + $?(@ tsmatch "fast & furious" tsconfig "simple" tsqparser "w") +(1 row) + +-- tsconfig and tsqparser can appear in any order +select '$ ? (@ tsmatch "fast & furious" tsqparser "w" tsconfig "simple" )'::jsonpath; + jsonpath +---------------------------------------------------------------- + $?(@ tsmatch "fast & furious" tsconfig "simple" tsqparser "w") +(1 row) + +select '$ ? (@ tsmatch "fast & furious" tsqparser "w")'::jsonpath; + jsonpath +---------------------------------------------- + $?(@ tsmatch "fast & furious" tsqparser "w") +(1 row) + +select '$[*] ? (@.title tsmatch "god" && @.rating > 5)'::jsonpath; + jsonpath +-------------------------------------------------- + $[*]?(@."title" tsmatch "god" && @."rating" > 5) +(1 row) + +select '$ ? (@ tsmatch $pattern)'::jsonpath; +ERROR: syntax error at or near "$pattern" of jsonpath input +LINE 1: select '$ ? (@ tsmatch $pattern)'::jsonpath; + ^ +-- only string literals (no variables) are allowed for tsquery +select '$ ? (@ tsmatch $var tsconfig "english")'::jsonpath; +ERROR: syntax error at or near "$var" of jsonpath input +LINE 1: select '$ ? (@ tsmatch $var tsconfig "english")'::jsonpath; + ^ +-- if a tsconfig doesn't exist it should parse nonetheless (executor will fail it) +select '$ ? (@ tsmatch "running" tsconfig "wrongconfig")'::jsonpath; + jsonpath +------------------------------------------------ + $?(@ tsmatch "running" tsconfig "wrongconfig") +(1 row) + diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql index d3a38c57791..8698021ba2b 100644 --- a/src/test/regress/sql/jsonb_jsonpath.sql +++ b/src/test/regress/sql/jsonb_jsonpath.sql @@ -1253,3 +1253,16 @@ SELECT jsonb_path_query_first(s1.j, '$.s > $s', vars => s2.j) gt FROM str s1, str s2 ORDER BY s1.num, s2.num; + +select jsonb_path_query('[null, 1, "running", "runs", "ran", "run", "runner", "jogging"]', 'lax $[*] ? (@ tsmatch "fly" tsconfig "english")'); +select jsonb_path_query('[null, 1, "running", "runs", "ran", "run", "runner", "jogging"]', 'lax $[*] ? (@ tsmatch "run" tsconfig "english")'); +select jsonb_path_query('[null, 1, "running", "runs", "ran", "run", "runner", "jogging"]', 'lax $[*] ? (@ tsmatch "run" tsconfig "simple")'); +select jsonb_path_query('[null, 1, "PostgreSQL", "postgres", "POSTGRES", "database"]', 'lax $[*] ? (@ tsmatch "Postgres" tsconfig "english")'); +select jsonb_path_query('[null, 1, "PostgreSQL", "postgres", "POSTGRES", "database"]', 'lax $[*] ? (@ tsmatch "Postgres" tsconfig "simple")'); +-- in the default tsqparser (to_tsquery) spaces are not allowed, so this should fail for syntax +select jsonb_path_query('["fast car", "super fast car", "fast and furious", "slow car"]', 'lax $[*] ? (@ tsmatch "fast car" tsconfig "english")'); +-- if we specify "w" however it should be ok +select jsonb_path_query('["fast car", "super fast car", "fast and furious", "slow car"]', 'lax $[*] ? (@ tsmatch "fast car" tsconfig "english" tsqparser "w")'); +-- it should also be ok if we change to a valid to_tsquery +select jsonb_path_query('["fast car", "super fast car", "fast and furious", "slow car"]', 'lax $[*] ? (@ tsmatch "fast & car" tsconfig "english")'); +select jsonb_path_query('["fat cat", "cat fat", "fat rats"]', 'lax $[*] ? (@ tsmatch "fat & rat" tsconfig "english")'); diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql index 44178d8b45a..d111cee6264 100644 --- a/src/test/regress/sql/jsonpath.sql +++ b/src/test/regress/sql/jsonpath.sql @@ -306,3 +306,22 @@ FROM unnest(ARRAY['$ ? (@ like_regex "pattern" flag "smixq")'::text, '00', '1a']) str, LATERAL pg_input_error_info(str, 'jsonpath') as errinfo; + +-- tsmatch (Full Text Search) + +-- basic success +select '$ ? (@ tsmatch "simple")'::jsonpath; +select '$ ? (@ tsmatch "running" tsconfig "english")'::jsonpath; +-- w/out tsconfig and tsqparser +select '$ ? (@ tsmatch "fast & furious" tsconfig "simple")'::jsonpath; +select '$ ? (@ tsmatch "fast & furious" tsconfig "simple" tsqparser "w")'::jsonpath; +-- tsconfig and tsqparser can appear in any order +select '$ ? (@ tsmatch "fast & furious" tsqparser "w" tsconfig "simple" )'::jsonpath; +select '$ ? (@ tsmatch "fast & furious" tsqparser "w")'::jsonpath; +select '$[*] ? (@.title tsmatch "god" && @.rating > 5)'::jsonpath; +select '$ ? (@ tsmatch $pattern)'::jsonpath; + +-- only string literals (no variables) are allowed for tsquery +select '$ ? (@ tsmatch $var tsconfig "english")'::jsonpath; +-- if a tsconfig doesn't exist it should parse nonetheless (executor will fail it) +select '$ ? (@ tsmatch "running" tsconfig "wrongconfig")'::jsonpath; -- 2.53.0