Hi This proposal is followup of implementation of XMLTABLE.
Lot of XML documents has assigned document namespace. <rows xmlns="http://x.y"><row><a>10</a></row></rows> For these XML document any search path must use schema "http://x.y". This is not too intuitive, and from XMLTABLE usage is not too user friendly, because the default column path (same like column name) cannot be used. A solution of this issue is default namespace - defined in SQL/XML. example - related to previous xml without default namespace: XMLTABLE(NAMESPACES('http://x.y' AS aux), '/aux:rows/aux:row' PASSING ... COLUMNS a int PATH 'aux:a') with default namespace XMLTABLE(NAMESPACES(DEFAULT 'http://x.y'), '/rows/row' PASSING ... COLUMNS a int); Unfortunately the libxml2 doesn't support default namespaces in XPath expressions. Because the libxml2 functionality is frozen, there is not big chance for support in near future. A implementation is not too hard - although it requires simple XPath expressions state translator. The databases with XMLTABLE implementation supports default namespace for XPath expressions. The patch for initial implementation is attached. Regards Pavel
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 583b3b241a..c2558a33ef 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -10465,8 +10465,7 @@ SELECT xpath_exists('/my:a/text()', '<my:a xmlns:my="http://example.com">test</m <para> The optional <literal>XMLNAMESPACES</> clause is a comma-separated list of namespaces. It specifies the XML namespaces used in - the document and their aliases. A default namespace specification - is not currently supported. + the document and their aliases. </para> <para> diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 0f512753e4..5a3715cc84 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -29,7 +29,7 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \ tsvector.o tsvector_op.o tsvector_parser.o \ txid.o uuid.o varbit.o varchar.o varlena.o version.o \ - windowfuncs.o xid.o xml.o + windowfuncs.o xid.o xml.o xpath_parser.o like.o: like.c like_match.c diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index f81cf489d2..d59a76f0b4 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -91,7 +91,7 @@ #include "utils/rel.h" #include "utils/syscache.h" #include "utils/xml.h" - +#include "utils/xpath_parser.h" /* GUC variables */ int xmlbinary; @@ -184,6 +184,7 @@ typedef struct XmlTableBuilderData xmlXPathCompExprPtr xpathcomp; xmlXPathObjectPtr xpathobj; xmlXPathCompExprPtr *xpathscomp; + bool with_default_ns; } XmlTableBuilderData; #endif @@ -4180,6 +4181,7 @@ XmlTableInitOpaque(TableFuncScanState *state, int natts) xtCxt->magic = XMLTABLE_CONTEXT_MAGIC; xtCxt->natts = natts; xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts); + xtCxt->with_default_ns = false; xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); @@ -4272,6 +4274,8 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value) #endif /* not USE_LIBXML */ } +#define DEFAULT_NAMESPACE_NAME "pgdefnamespace" + /* * XmlTableSetNamespace * Add a namespace declaration @@ -4282,12 +4286,14 @@ XmlTableSetNamespace(TableFuncScanState *state, char *name, char *uri) #ifdef USE_LIBXML XmlTableBuilderData *xtCxt; - if (name == NULL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("DEFAULT namespace is not supported"))); xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace"); + if (name == NULL) + { + xtCxt->with_default_ns = true; + name = DEFAULT_NAMESPACE_NAME; + } + if (xmlXPathRegisterNs(xtCxt->xpathcxt, pg_xmlCharStrndup(name, strlen(name)), pg_xmlCharStrndup(uri, strlen(uri)))) @@ -4316,6 +4322,14 @@ XmlTableSetRowFilter(TableFuncScanState *state, char *path) (errcode(ERRCODE_DATA_EXCEPTION), errmsg("row path filter must not be empty string"))); + if (xtCxt->with_default_ns) + { + StringInfoData str; + + transformXPath(&str, path, DEFAULT_NAMESPACE_NAME); + path = str.data; + } + xstr = pg_xmlCharStrndup(path, strlen(path)); xtCxt->xpathcomp = xmlXPathCompile(xstr); @@ -4347,6 +4361,14 @@ XmlTableSetColumnFilter(TableFuncScanState *state, char *path, int colnum) (errcode(ERRCODE_DATA_EXCEPTION), errmsg("column path filter must not be empty string"))); + if (xtCxt->with_default_ns) + { + StringInfoData str; + + transformXPath(&str, path, DEFAULT_NAMESPACE_NAME); + path = str.data; + } + xstr = pg_xmlCharStrndup(path, strlen(path)); xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr); diff --git a/src/backend/utils/adt/xpath_parser.c b/src/backend/utils/adt/xpath_parser.c new file mode 100644 index 0000000000..7ec2d584c6 --- /dev/null +++ b/src/backend/utils/adt/xpath_parser.c @@ -0,0 +1,323 @@ +/*------------------------------------------------------------------------- + * + * xpath_parser.c + * XML XPath parser. + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/utils/adt/xpath_parser.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/xpath_parser.h" + +/* + * All PostgreSQL XML related functionality is based on libxml2 library, and + * XPath support is not an exception. However, libxml2 doesn't support + * default namespace for XPath expressions. Because there are not any API + * how to transform or access to parsed XPath expression we have to parse + * XPath here. + * + * Those functionalities are implemented with a simple XPath parser/ + * preprocessor. This XPath parser transforms a XPath expression to another + * XPath expression that can be used by libxml2 XPath evaluation. It doesn't + * replace libxml2 XPath parser or libxml2 XPath expression evaluation. + */ + +#ifdef USE_LIBXML + +/* + * We need to work with XPath expression tokens. When expression starting with + * nodename, then we can use prefix. When default namespace is defined, then we + * should to enhance any nodename and attribute without namespace by default + * namespace. + */ + +typedef enum +{ + XPATH_TOKEN_NONE, + XPATH_TOKEN_NAME, + XPATH_TOKEN_STRING, + XPATH_TOKEN_NUMBER, + XPATH_TOKEN_OTHER +} XPathTokenType; + +typedef struct XPathTokenInfo +{ + XPathTokenType ttype; + char *start; + int length; +} XPathTokenInfo; + +#define TOKEN_STACK_SIZE 10 + +typedef struct ParserData +{ + char *str; + char *cur; + XPathTokenInfo stack[TOKEN_STACK_SIZE]; + int stack_length; +} XPathParserData; + +/* Any high-bit-set character is OK (might be part of a multibyte char) */ +#define NODENAME_FIRSTCHAR(c) ((c) == '_' || (c) == '-' || \ + ((c) >= 'A' && (c) <= 'Z') || \ + ((c) >= 'a' && (c) <= 'z') || \ + (IS_HIGHBIT_SET(c))) + +#define IS_NODENAME_CHAR(c) (NODENAME_FIRSTCHAR(c) || (c) == '.' || \ + ((c) >= '0' && (c) <= '9')) + + +/* + * Returns next char after last char of token - XPath lexer + */ +static char * +getXPathToken(char *str, XPathTokenInfo * ti) +{ + /* skip initial spaces */ + while (*str == ' ') + str++; + + if (*str != '\0') + { + char c = *str; + + ti->start = str++; + + if (c >= '0' && c <= '9') + { + while (*str >= '0' && *str <= '9') + str++; + if (*str == '.') + { + str++; + while (*str >= '0' && *str <= '9') + str++; + } + ti->ttype = XPATH_TOKEN_NUMBER; + } + else if (NODENAME_FIRSTCHAR(c)) + { + while (IS_NODENAME_CHAR(*str)) + str++; + + ti->ttype = XPATH_TOKEN_NAME; + } + else if (c == '"') + { + while (*str != '\0') + if (*str++ == '"') + break; + + ti->ttype = XPATH_TOKEN_STRING; + } + else + ti->ttype = XPATH_TOKEN_OTHER; + + ti->length = str - ti->start; + } + else + { + ti->start = NULL; + ti->length = 0; + + ti->ttype = XPATH_TOKEN_NONE; + } + + return str; +} + +/* + * reset XPath parser stack + */ +static void +initXPathParser(XPathParserData * parser, char *str) +{ + parser->str = str; + parser->cur = str; + parser->stack_length = 0; +} + +/* + * Returns token from stack or read token + */ +static void +nextXPathToken(XPathParserData * parser, XPathTokenInfo * ti) +{ + if (parser->stack_length > 0) + memcpy(ti, &parser->stack[--parser->stack_length], + sizeof(XPathTokenInfo)); + else + parser->cur = getXPathToken(parser->cur, ti); +} + +/* + * Push token to stack + */ +static void +pushXPathToken(XPathParserData * parser, XPathTokenInfo * ti) +{ + if (parser->stack_length == TOKEN_STACK_SIZE) + elog(ERROR, "internal error"); + memcpy(&parser->stack[parser->stack_length++], ti, + sizeof(XPathTokenInfo)); +} + +/* + * Write token to output string + */ +static void +writeXPathToken(StringInfo str, XPathTokenInfo * ti) +{ + Assert(ti->ttype != XPATH_TOKEN_NONE); + + if (ti->ttype != XPATH_TOKEN_OTHER) + appendBinaryStringInfo(str, ti->start, ti->length); + else + appendStringInfoChar(str, *ti->start); +} + +/* + * This is main part of XPath transformation. It can be called recursivly, + * when XPath expression contains predicates. + */ +static void +_transformXPath(StringInfo str, XPathParserData * parser, + bool inside_predicate, + char *def_namespace_name) +{ + XPathTokenInfo t1, + t2; + bool last_token_is_name = false; + + nextXPathToken(parser, &t1); + + while (t1.ttype != XPATH_TOKEN_NONE) + { + switch (t1.ttype) + { + case XPATH_TOKEN_NUMBER: + case XPATH_TOKEN_STRING: + last_token_is_name = false; + writeXPathToken(str, &t1); + nextXPathToken(parser, &t1); + break; + + case XPATH_TOKEN_NAME: + { + bool is_qual_name = false; + + /* inside predicate ignore keywords "and" "or" */ + if (inside_predicate) + { + if ((strncmp(t1.start, "and", 3) == 0 && t1.length == 3) || + (strncmp(t1.start, "or", 2) == 0 && t1.length == 2)) + { + writeXPathToken(str, &t1); + nextXPathToken(parser, &t1); + break; + } + } + + last_token_is_name = true; + nextXPathToken(parser, &t2); + if (t2.ttype == XPATH_TOKEN_OTHER) + { + if (*t2.start == '(') + last_token_is_name = false; + else if (*t2.start == ':') + is_qual_name = true; + } + + if (last_token_is_name && !is_qual_name && def_namespace_name != NULL) + appendStringInfo(str, "%s:", def_namespace_name); + + writeXPathToken(str, &t1); + + if (is_qual_name) + { + writeXPathToken(str, &t2); + nextXPathToken(parser, &t1); + if (t1.ttype == XPATH_TOKEN_NAME) + writeXPathToken(str, &t1); + else + pushXPathToken(parser, &t1); + } + else + pushXPathToken(parser, &t2); + + nextXPathToken(parser, &t1); + } + break; + + case XPATH_TOKEN_OTHER: + { + char c = *t1.start; + + writeXPathToken(str, &t1); + + if (c == '[') + _transformXPath(str, parser, true, def_namespace_name); + else + { + last_token_is_name = false; + + if (c == ']' && inside_predicate) + return; + + else if (c == '@') + { + nextXPathToken(parser, &t1); + if (t1.ttype == XPATH_TOKEN_NAME) + { + bool is_qual_name = false; + + nextXPathToken(parser, &t2); + if (t2.ttype == XPATH_TOKEN_OTHER && *t2.start == ':') + is_qual_name = true; + + if (!is_qual_name && def_namespace_name != NULL) + appendStringInfo(str, "%s:", def_namespace_name); + + writeXPathToken(str, &t1); + if (is_qual_name) + { + writeXPathToken(str, &t2); + nextXPathToken(parser, &t1); + if (t1.ttype == XPATH_TOKEN_NAME) + writeXPathToken(str, &t1); + else + pushXPathToken(parser, &t1); + } + else + pushXPathToken(parser, &t2); + } + else + pushXPathToken(parser, &t1); + } + } + nextXPathToken(parser, &t1); + } + break; + + case XPATH_TOKEN_NONE: + elog(ERROR, "should not be here"); + } + } +} + +void +transformXPath(StringInfo str, char *xpath, + char *def_namespace_name) +{ + XPathParserData parser; + + initStringInfo(str); + initXPathParser(&parser, xpath); + _transformXPath(str, &parser, false, def_namespace_name); +} + +#endif diff --git a/src/include/utils/xpath_parser.h b/src/include/utils/xpath_parser.h new file mode 100644 index 0000000000..b2fc239e12 --- /dev/null +++ b/src/include/utils/xpath_parser.h @@ -0,0 +1,23 @@ +/*------------------------------------------------------------------------- + * + * xpath_parser.h + * Declarations for XML XPath transformation. + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/xml.h + * + *------------------------------------------------------------------------- + */ + +#ifndef XPATH_PARSER_H +#define XPATH_PARSER_H + +#include "postgres.h" +#include "lib/stringinfo.h" + +void transformXPath(StringInfo str, char *xpath, char *def_namespace_name); + +#endif /* XPATH_PARSER_H */ diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index bcc585d427..9c543edad6 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -1085,7 +1085,11 @@ SELECT * FROM XMLTABLE(XMLNAMESPACES(DEFAULT 'http://x.y'), '/rows/row' PASSING '<rows xmlns="http://x.y"><row><a>10</a></row></rows>' COLUMNS a int PATH 'a'); -ERROR: DEFAULT namespace is not supported + a +---- + 10 +(1 row) + -- used in prepare statements PREPARE pp AS SELECT xmltable.*
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers