From 005ab91d885633489036c6c86ff15285e48e818b Mon Sep 17 00:00:00 2001 From: Marcos Magueta Date: Wed, 21 Jan 2026 17:11:52 -0300 Subject: [PATCH 4/5] Add XMLVALIDATE function for XML schema validation --- src/backend/executor/execExprInterp.c | 27 +++++ src/backend/parser/parse_expr.c | 43 ++++++++ src/backend/parser/parse_target.c | 3 + src/backend/utils/adt/xml.c | 144 +++++++++++++++++++++++++- src/include/nodes/primnodes.h | 1 + src/include/utils/xml.h | 1 + 6 files changed, 217 insertions(+), 2 deletions(-) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index a7a5ac1e83b..2b416a4d903 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4626,6 +4626,33 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op) } break; + case IS_XMLVALIDATE: + { + Datum *argvalue = op->d.xmlexpr.argvalue; + bool *argnull = op->d.xmlexpr.argnull; + xmltype *data; + Oid schema_oid; + xmltype *result; + + /* Two arguments: XML data and schema OID */ + Assert(list_length(xexpr->args) == 2); + + if (argnull[0] || argnull[1]) + { + *op->resnull = true; + return; + } + + data = DatumGetXmlP(argvalue[0]); + schema_oid = DatumGetObjectId(argvalue[1]); + + result = xmlvalidate_schema(data, schema_oid); + + *op->resvalue = PointerGetDatum(result); + *op->resnull = false; + } + break; + case IS_DOCUMENT: { Datum *argvalue = op->d.xmlexpr.argvalue; diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 56826db4c26..fd399b14d9a 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -16,6 +16,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "catalog/namespace.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_type.h" #include "miscadmin.h" @@ -2361,6 +2362,7 @@ transformXmlExpr(ParseState *pstate, XmlExpr *x) XmlExpr *newx; ListCell *lc; int i; + Oid xmlvalidate_schema_oid = InvalidOid; newx = makeNode(XmlExpr); newx->op = x->op; @@ -2373,6 +2375,22 @@ transformXmlExpr(ParseState *pstate, XmlExpr *x) newx->typmod = -1; newx->location = x->location; + /* + * XMLVALIDATE stores the schema name list in named_args, not ResTargets. + * Extract it before processing named arguments. + */ + if (x->op == IS_XMLVALIDATE && x->named_args != NIL) + { + List *schema_name_list; + + schema_name_list = x->named_args; + xmlvalidate_schema_oid = get_xmlschema_oid(schema_name_list, false); + /* Preserve schema name for deparsing */ + newx->name = NameListToString(schema_name_list); + /* Clear to avoid processing as ResTargets */ + x->named_args = NIL; + } + /* * gram.y built the named args as a list of ResTarget. Transform each, * and break the names out as a separate list. @@ -2472,6 +2490,11 @@ transformXmlExpr(ParseState *pstate, XmlExpr *x) /* not handled here */ Assert(false); break; + case IS_XMLVALIDATE: + /* First argument is the XML data */ + newe = coerce_to_specific_type(pstate, newe, XMLOID, + "XMLVALIDATE"); + break; case IS_DOCUMENT: newe = coerce_to_specific_type(pstate, newe, XMLOID, "IS DOCUMENT"); @@ -2481,6 +2504,26 @@ transformXmlExpr(ParseState *pstate, XmlExpr *x) i++; } + /* For XMLVALIDATE, add the schema OID as second argument */ + if (x->op == IS_XMLVALIDATE) + { + Const *schema_oid_const; + + Assert(OidIsValid(xmlvalidate_schema_oid)); + + schema_oid_const = makeConst(OIDOID, + -1, + InvalidOid, + sizeof(Oid), + ObjectIdGetDatum(xmlvalidate_schema_oid), + false, + true); + newx->args = lappend(newx->args, schema_oid_const); + + /* Return type is XML */ + newx->type = XMLOID; + } + return (Node *) newx; } diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index b5a2f915b67..b380bb39eb1 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -1976,6 +1976,9 @@ FigureColnameInternal(Node *node, char **name) case IS_XMLSERIALIZE: *name = "xmlserialize"; return 2; + case IS_XMLVALIDATE: + *name = "xmlvalidate"; + return 2; case IS_DOCUMENT: /* nothing */ break; diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index f69dc68286c..d57c3c07f91 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -58,6 +58,7 @@ #include #include #include +#include /* * We used to check for xmlStructuredErrorContext via a configure test; but @@ -84,6 +85,7 @@ #include "catalog/namespace.h" #include "catalog/pg_class.h" #include "catalog/pg_type.h" +#include "catalog/pg_xmlschema.h" #include "executor/spi.h" #include "executor/tablefunc.h" #include "fmgr.h" @@ -94,6 +96,7 @@ #include "nodes/execnodes.h" #include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" +#include "utils/acl.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/date.h" @@ -1158,10 +1161,147 @@ xmlvalidate(PG_FUNCTION_ARGS) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("xmlvalidate is not implemented"))); + errmsg("xmlvalidate is not implemented against generalized schema definitions"))); return 0; } +/* + * xmlvalidate_schema - validate XML document against a registered XML Schema + * + * Validates the given XML document against the schema identified by a schema_oid. + * Returns the validated XML value, or raises an error if the validation fails. + */ +xmltype * +xmlvalidate_schema(xmltype *data, Oid schema_oid) +{ +#ifdef USE_LIBXML + HeapTuple tuple; + Datum schema_datum; + bool isnull; + xmltype *schema_xml; + char *schemastr; + volatile xmlDocPtr doc = NULL; + volatile xmlSchemaParserCtxtPtr schema_parser_ctxt = NULL; + volatile xmlSchemaPtr schema_ptr = NULL; + volatile xmlSchemaValidCtxtPtr valid_ctxt = NULL; + int result; + PgXmlErrorContext *xmlerrcxt; + AclResult aclresult; + + /* Check usage permission first */ + aclresult = object_aclcheck(XmlSchemaRelationId, schema_oid, + GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + { + /* Fetch tuple only to get name for the error message */ + Form_pg_xmlschema schema_form; + + tuple = SearchSysCache1(XMLSCHEMAOID, ObjectIdGetDatum(schema_oid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for XML schema %u", schema_oid); + + schema_form = (Form_pg_xmlschema) GETSTRUCT(tuple); + ReleaseSysCache(tuple); + + aclcheck_error(aclresult, OBJECT_XMLSCHEMA, + NameStr(schema_form->schemaname)); + } + + tuple = SearchSysCache1(XMLSCHEMAOID, ObjectIdGetDatum(schema_oid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for XML schema %u", schema_oid); + + schema_datum = SysCacheGetAttr(XMLSCHEMAOID, tuple, + Anum_pg_xmlschema_schemadata, &isnull); + if (isnull) + elog(ERROR, "null schemadata for XML schema %u", schema_oid); + + schema_xml = DatumGetXmlP(schema_datum); + schemastr = text_to_cstring(schema_xml); + ReleaseSysCache(tuple); + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED); + + PG_TRY(); + { + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + doc = xml_parse((text *) data, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext); + + if (escontext.error_occurred || doc == NULL) + { + if (escontext.error_occurred && escontext.error_data) + { + ErrorData *edata = escontext.error_data; + + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_DOCUMENT), + errmsg("invalid XML document"), + errdetail_internal("%s", edata->message ? edata->message : "unknown error"))); + } + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "invalid XML document"); + } + + schema_parser_ctxt = xmlSchemaNewMemParserCtxt(schemastr, strlen(schemastr)); + if (schema_parser_ctxt == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "failed to create schema parser context"); + + schema_ptr = xmlSchemaParse(schema_parser_ctxt); + if (schema_ptr == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "failed to parse XML schema"); + + valid_ctxt = xmlSchemaNewValidCtxt(schema_ptr); + if (valid_ctxt == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "failed to create schema validation context"); + + xmlSchemaSetValidStructuredErrors(valid_ctxt, xml_errorHandler, xmlerrcxt); + + result = xmlSchemaValidateDoc(valid_ctxt, doc); + if (result < 0) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "internal error during schema validation"); + if (result > 0) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "XML validation failed"); + } + PG_CATCH(); + { + if (valid_ctxt) + xmlSchemaFreeValidCtxt(valid_ctxt); + if (schema_ptr) + xmlSchemaFree(schema_ptr); + if (schema_parser_ctxt) + xmlSchemaFreeParserCtxt(schema_parser_ctxt); + if (doc) + xmlFreeDoc(doc); + pg_xml_done(xmlerrcxt, true); + pfree(schemastr); + PG_RE_THROW(); + } + PG_END_TRY(); + + if (valid_ctxt) + xmlSchemaFreeValidCtxt(valid_ctxt); + if (schema_ptr) + xmlSchemaFree(schema_ptr); + if (schema_parser_ctxt) + xmlSchemaFreeParserCtxt(schema_parser_ctxt); + if (doc) + xmlFreeDoc(doc); + + pg_xml_done(xmlerrcxt, false); + pfree(schemastr); + return data; +#else + NO_XML_SUPPORT(); + return NULL; +#endif +} bool xml_is_document(xmltype *arg) @@ -1181,7 +1321,7 @@ xml_is_document(xmltype *arg) return !escontext.error_occurred; #else /* not USE_LIBXML */ NO_XML_SUPPORT(); - return false; + return NULL; #endif /* not USE_LIBXML */ } diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 5211cadc258..c40cbc8981a 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1610,6 +1610,7 @@ typedef enum XmlExprOp IS_XMLROOT, /* XMLROOT(xml, version, standalone) */ IS_XMLSERIALIZE, /* XMLSERIALIZE(is_document, xmlval, indent) */ IS_DOCUMENT, /* xmlval IS DOCUMENT */ + IS_XMLVALIDATE, /* XMLVALIDATE(xmlval, schema) */ } XmlExprOp; typedef enum XmlOptionType diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index 03acb255449..dc6a4d37840 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -76,6 +76,7 @@ extern xmltype *xmlelement(XmlExpr *xexpr, extern xmltype *xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace); extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null); extern xmltype *xmlroot(xmltype *data, text *version, int standalone); +extern xmltype *xmlvalidate_schema(xmltype *data, Oid schema_oid); extern bool xml_is_document(xmltype *arg); extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent); -- 2.51.2