[HACKERS] proposal - Default namespaces for XPath expressions (PostgreSQL 11)

Pavel Stehule Sat, 11 Mar 2017 11:46:52 -0800

Hi

This proposal is followup of implementation of XMLTABLE.


Lot of XML documents has assigned document namespace.

<rows xmlns="http://x.y";><row><a>10</a></row></rows>

For these XML document any search path must use schema "http://x.y";. This
is not too intuitive, and from XMLTABLE usage is not too user friendly,
because the default column path (same like column name) cannot be used. A
solution of this issue is default namespace - defined in SQL/XML.

example - related to previous xml

without default namespace:
XMLTABLE(NAMESPACES('http://x.y' AS aux),
                    '/aux:rows/aux:row' PASSING ...
                    COLUMNS a int PATH 'aux:a')

with default namespace
XMLTABLE(NAMESPACES(DEFAULT 'http://x.y'),
                    '/rows/row' PASSING ...
                    COLUMNS a int);


Unfortunately the libxml2 doesn't support default namespaces in XPath
expressions. Because the libxml2 functionality is frozen, there is not big
chance for support in near future. A implementation is not too hard -
although it requires simple XPath expressions state translator.

The databases with XMLTABLE implementation supports default namespace for
XPath expressions.

The patch for initial implementation is attached.

Regards

Pavel

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 583b3b241a..c2558a33ef 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -10465,8 +10465,7 @@ SELECT xpath_exists('/my:a/text()', '<my:a xmlns:my="http://example.com";>test</m
     <para>
      The optional <literal>XMLNAMESPACES</> clause is a comma-separated
      list of namespaces.  It specifies the XML namespaces used in
-     the document and their aliases. A default namespace specification
-     is not currently supported.
+     the document and their aliases.
     </para>
 
     <para>
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 0f512753e4..5a3715cc84 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -29,7 +29,7 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \
 	tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
 	tsvector.o tsvector_op.o tsvector_parser.o \
 	txid.o uuid.o varbit.o varchar.o varlena.o version.o \
-	windowfuncs.o xid.o xml.o
+	windowfuncs.o xid.o xml.o xpath_parser.o
 
 like.o: like.c like_match.c
 
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index f81cf489d2..d59a76f0b4 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -91,7 +91,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/xml.h"
-
+#include "utils/xpath_parser.h"
 
 /* GUC variables */
 int			xmlbinary;
@@ -184,6 +184,7 @@ typedef struct XmlTableBuilderData
 	xmlXPathCompExprPtr xpathcomp;
 	xmlXPathObjectPtr xpathobj;
 	xmlXPathCompExprPtr *xpathscomp;
+	bool		with_default_ns;
 } XmlTableBuilderData;
 #endif
 
@@ -4180,6 +4181,7 @@ XmlTableInitOpaque(TableFuncScanState *state, int natts)
 	xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
 	xtCxt->natts = natts;
 	xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
+	xtCxt->with_default_ns = false;
 
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
@@ -4272,6 +4274,8 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value)
 #endif   /* not USE_LIBXML */
 }
 
+#define DEFAULT_NAMESPACE_NAME		"pgdefnamespace"
+
 /*
  * XmlTableSetNamespace
  *		Add a namespace declaration
@@ -4282,12 +4286,14 @@ XmlTableSetNamespace(TableFuncScanState *state, char *name, char *uri)
 #ifdef USE_LIBXML
 	XmlTableBuilderData *xtCxt;
 
-	if (name == NULL)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("DEFAULT namespace is not supported")));
 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
 
+	if (name == NULL)
+	{
+		xtCxt->with_default_ns = true;
+		name = DEFAULT_NAMESPACE_NAME;
+	}
+
 	if (xmlXPathRegisterNs(xtCxt->xpathcxt,
 						   pg_xmlCharStrndup(name, strlen(name)),
 						   pg_xmlCharStrndup(uri, strlen(uri))))
@@ -4316,6 +4322,14 @@ XmlTableSetRowFilter(TableFuncScanState *state, char *path)
 				(errcode(ERRCODE_DATA_EXCEPTION),
 				 errmsg("row path filter must not be empty string")));
 
+	if (xtCxt->with_default_ns)
+	{
+		StringInfoData		str;
+
+		transformXPath(&str, path, DEFAULT_NAMESPACE_NAME);
+		path = str.data;
+	}
+
 	xstr = pg_xmlCharStrndup(path, strlen(path));
 
 	xtCxt->xpathcomp = xmlXPathCompile(xstr);
@@ -4347,6 +4361,14 @@ XmlTableSetColumnFilter(TableFuncScanState *state, char *path, int colnum)
 				(errcode(ERRCODE_DATA_EXCEPTION),
 				 errmsg("column path filter must not be empty string")));
 
+	if (xtCxt->with_default_ns)
+	{
+		StringInfoData		str;
+
+		transformXPath(&str, path, DEFAULT_NAMESPACE_NAME);
+		path = str.data;
+	}
+
 	xstr = pg_xmlCharStrndup(path, strlen(path));
 
 	xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
diff --git a/src/backend/utils/adt/xpath_parser.c b/src/backend/utils/adt/xpath_parser.c
new file mode 100644
index 0000000000..7ec2d584c6
--- /dev/null
+++ b/src/backend/utils/adt/xpath_parser.c
@@ -0,0 +1,323 @@
+/*-------------------------------------------------------------------------
+ *
+ * xpath_parser.c
+ *	  XML XPath parser.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/utils/adt/xpath_parser.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "utils/xpath_parser.h"
+
+/*
+ * All PostgreSQL XML related functionality is based on libxml2 library, and
+ * XPath support is not an exception.  However, libxml2 doesn't support
+ * default namespace for XPath expressions. Because there are not any API
+ * how to transform or access to parsed XPath expression we have to parse
+ * XPath here.
+ *
+ * Those functionalities are implemented with a simple XPath parser/
+ * preprocessor.  This XPath parser transforms a XPath expression to another
+ * XPath expression that can be used by libxml2 XPath evaluation. It doesn't
+ * replace libxml2 XPath parser or libxml2 XPath expression evaluation.
+ */
+
+#ifdef USE_LIBXML
+
+/*
+ * We need to work with XPath expression tokens.  When expression starting with
+ * nodename, then we can use prefix.  When default namespace is defined, then we
+ * should to enhance any nodename and attribute without namespace by default
+ * namespace.
+ */
+
+typedef enum
+{
+	XPATH_TOKEN_NONE,
+	XPATH_TOKEN_NAME,
+	XPATH_TOKEN_STRING,
+	XPATH_TOKEN_NUMBER,
+	XPATH_TOKEN_OTHER
+}	XPathTokenType;
+
+typedef struct XPathTokenInfo
+{
+	XPathTokenType ttype;
+	char	   *start;
+	int			length;
+}	XPathTokenInfo;
+
+#define TOKEN_STACK_SIZE		10
+
+typedef struct ParserData
+{
+	char	   *str;
+	char	   *cur;
+	XPathTokenInfo stack[TOKEN_STACK_SIZE];
+	int			stack_length;
+}	XPathParserData;
+
+/* Any high-bit-set character is OK (might be part of a multibyte char) */
+#define NODENAME_FIRSTCHAR(c)	 ((c) == '_' || (c) == '-' || \
+								 ((c) >= 'A' && (c) <= 'Z') || \
+								 ((c) >= 'a' && (c) <= 'z') || \
+								 (IS_HIGHBIT_SET(c)))
+
+#define IS_NODENAME_CHAR(c)		(NODENAME_FIRSTCHAR(c) || (c) == '.' || \
+								 ((c) >= '0' && (c) <= '9'))
+
+
+/*
+ * Returns next char after last char of token - XPath lexer
+ */
+static char *
+getXPathToken(char *str, XPathTokenInfo * ti)
+{
+	/* skip initial spaces */
+	while (*str == ' ')
+		str++;
+
+	if (*str != '\0')
+	{
+		char		c = *str;
+
+		ti->start = str++;
+
+		if (c >= '0' && c <= '9')
+		{
+			while (*str >= '0' && *str <= '9')
+				str++;
+			if (*str == '.')
+			{
+				str++;
+				while (*str >= '0' && *str <= '9')
+					str++;
+			}
+			ti->ttype = XPATH_TOKEN_NUMBER;
+		}
+		else if (NODENAME_FIRSTCHAR(c))
+		{
+			while (IS_NODENAME_CHAR(*str))
+				str++;
+
+			ti->ttype = XPATH_TOKEN_NAME;
+		}
+		else if (c == '"')
+		{
+			while (*str != '\0')
+				if (*str++ == '"')
+					break;
+
+			ti->ttype = XPATH_TOKEN_STRING;
+		}
+		else
+			ti->ttype = XPATH_TOKEN_OTHER;
+
+		ti->length = str - ti->start;
+	}
+	else
+	{
+		ti->start = NULL;
+		ti->length = 0;
+
+		ti->ttype = XPATH_TOKEN_NONE;
+	}
+
+	return str;
+}
+
+/*
+ * reset XPath parser stack
+ */
+static void
+initXPathParser(XPathParserData * parser, char *str)
+{
+	parser->str = str;
+	parser->cur = str;
+	parser->stack_length = 0;
+}
+
+/*
+ * Returns token from stack or read token
+ */
+static void
+nextXPathToken(XPathParserData * parser, XPathTokenInfo * ti)
+{
+	if (parser->stack_length > 0)
+		memcpy(ti, &parser->stack[--parser->stack_length],
+			   sizeof(XPathTokenInfo));
+	else
+		parser->cur = getXPathToken(parser->cur, ti);
+}
+
+/*
+ * Push token to stack
+ */
+static void
+pushXPathToken(XPathParserData * parser, XPathTokenInfo * ti)
+{
+	if (parser->stack_length == TOKEN_STACK_SIZE)
+		elog(ERROR, "internal error");
+	memcpy(&parser->stack[parser->stack_length++], ti,
+		   sizeof(XPathTokenInfo));
+}
+
+/*
+ * Write token to output string
+ */
+static void
+writeXPathToken(StringInfo str, XPathTokenInfo * ti)
+{
+	Assert(ti->ttype != XPATH_TOKEN_NONE);
+
+	if (ti->ttype != XPATH_TOKEN_OTHER)
+		appendBinaryStringInfo(str, ti->start, ti->length);
+	else
+		appendStringInfoChar(str, *ti->start);
+}
+
+/*
+ * This is main part of XPath transformation. It can be called recursivly,
+ * when XPath expression contains predicates.
+ */
+static void
+_transformXPath(StringInfo str, XPathParserData * parser,
+				bool inside_predicate,
+				char *def_namespace_name)
+{
+	XPathTokenInfo t1,
+				t2;
+	bool		last_token_is_name = false;
+
+	nextXPathToken(parser, &t1);
+
+	while (t1.ttype != XPATH_TOKEN_NONE)
+	{
+		switch (t1.ttype)
+		{
+			case XPATH_TOKEN_NUMBER:
+			case XPATH_TOKEN_STRING:
+				last_token_is_name = false;
+				writeXPathToken(str, &t1);
+				nextXPathToken(parser, &t1);
+				break;
+
+			case XPATH_TOKEN_NAME:
+				{
+					bool		is_qual_name = false;
+
+					/* inside predicate ignore keywords "and" "or" */
+					if (inside_predicate)
+					{
+						if ((strncmp(t1.start, "and", 3) == 0 && t1.length == 3) ||
+						 (strncmp(t1.start, "or", 2) == 0 && t1.length == 2))
+						{
+							writeXPathToken(str, &t1);
+							nextXPathToken(parser, &t1);
+							break;
+						}
+					}
+
+					last_token_is_name = true;
+					nextXPathToken(parser, &t2);
+					if (t2.ttype == XPATH_TOKEN_OTHER)
+					{
+						if (*t2.start == '(')
+							last_token_is_name = false;
+						else if (*t2.start == ':')
+							is_qual_name = true;
+					}
+
+					if (last_token_is_name && !is_qual_name && def_namespace_name != NULL)
+						appendStringInfo(str, "%s:", def_namespace_name);
+
+					writeXPathToken(str, &t1);
+
+					if (is_qual_name)
+					{
+						writeXPathToken(str, &t2);
+						nextXPathToken(parser, &t1);
+						if (t1.ttype == XPATH_TOKEN_NAME)
+							writeXPathToken(str, &t1);
+						else
+							pushXPathToken(parser, &t1);
+					}
+					else
+						pushXPathToken(parser, &t2);
+
+					nextXPathToken(parser, &t1);
+				}
+				break;
+
+			case XPATH_TOKEN_OTHER:
+				{
+					char		c = *t1.start;
+
+					writeXPathToken(str, &t1);
+
+					if (c == '[')
+						_transformXPath(str, parser, true, def_namespace_name);
+					else
+					{
+						last_token_is_name = false;
+
+						if (c == ']' && inside_predicate)
+							return;
+
+						else if (c == '@')
+						{
+							nextXPathToken(parser, &t1);
+							if (t1.ttype == XPATH_TOKEN_NAME)
+							{
+								bool		is_qual_name = false;
+
+								nextXPathToken(parser, &t2);
+								if (t2.ttype == XPATH_TOKEN_OTHER && *t2.start == ':')
+									is_qual_name = true;
+
+								if (!is_qual_name && def_namespace_name != NULL)
+									appendStringInfo(str, "%s:", def_namespace_name);
+
+								writeXPathToken(str, &t1);
+								if (is_qual_name)
+								{
+									writeXPathToken(str, &t2);
+									nextXPathToken(parser, &t1);
+									if (t1.ttype == XPATH_TOKEN_NAME)
+										writeXPathToken(str, &t1);
+									else
+										pushXPathToken(parser, &t1);
+								}
+								else
+									pushXPathToken(parser, &t2);
+							}
+							else
+								pushXPathToken(parser, &t1);
+						}
+					}
+					nextXPathToken(parser, &t1);
+				}
+				break;
+
+			case XPATH_TOKEN_NONE:
+				elog(ERROR, "should not be here");
+		}
+	}
+}
+
+void
+transformXPath(StringInfo str, char *xpath,
+			   char *def_namespace_name)
+{
+	XPathParserData parser;
+
+	initStringInfo(str);
+	initXPathParser(&parser, xpath);
+	_transformXPath(str, &parser, false, def_namespace_name);
+}
+
+#endif
diff --git a/src/include/utils/xpath_parser.h b/src/include/utils/xpath_parser.h
new file mode 100644
index 0000000000..b2fc239e12
--- /dev/null
+++ b/src/include/utils/xpath_parser.h
@@ -0,0 +1,23 @@
+/*-------------------------------------------------------------------------
+ *
+ * xpath_parser.h
+ *	  Declarations for XML XPath transformation.
+ *
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/xml.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef XPATH_PARSER_H
+#define XPATH_PARSER_H
+
+#include "postgres.h"
+#include "lib/stringinfo.h"
+
+void transformXPath(StringInfo str, char *xpath, char *def_namespace_name);
+
+#endif   /* XPATH_PARSER_H */
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index bcc585d427..9c543edad6 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -1085,7 +1085,11 @@ SELECT * FROM XMLTABLE(XMLNAMESPACES(DEFAULT 'http://x.y'),
                       '/rows/row'
                       PASSING '<rows xmlns="http://x.y";><row><a>10</a></row></rows>'
                       COLUMNS a int PATH 'a');
-ERROR:  DEFAULT namespace is not supported
+ a  
+----
+ 10
+(1 row)
+
 -- used in prepare statements
 PREPARE pp AS
 SELECT  xmltable.*

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] proposal - Default namespaces for XPath expressions (PostgreSQL 11)

Reply via email to