As a result of discussion with Peter, I provide modified patch for
xpath_array() with namespaces support.
The signature is:
_xml xpath_array(text xpathQuery, xml xmlValue[, _text namespacesBindings])
The third argument is 2-dimensional array defining bindings for
namespaces. Simple examples:
xmltest=# SELECT xpath_array('//text()', '<local:data
xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:piece id="2" /></local:data>');
xpath_array
----------------
{"number one"}
(1 row)
xmltest=# SELECT xpath_array('//loc:piece/@id', '<local:data
xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:piece id="2" /></local:data>',
ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
xpath_array
-------------
{1,2}
(1 row)
Thoughts regarding other XPath functions were exposed a couple of days
ago: http://archives.postgresql.org/pgsql-patches/2007-02/msg00373.php
If there is no objections, we could call the function provided in this
patch as xpath() or xmlpath() (the latter is similar to SQL/XML
functions).
Also, maybe someone can suggest better approach for passing namespace
bindings (more convenient than ARRAY[ARRAY[...], ARRAY[...]])?
--
Best regards,
Nikolay
Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.31
diff -u -r1.31 xml.c
--- src/backend/utils/adt/xml.c 16 Feb 2007 18:37:43 -0000 1.31
+++ src/backend/utils/adt/xml.c 20 Feb 2007 23:20:54 -0000
@@ -47,6 +47,8 @@
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
#include <libxml/xmlwriter.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
#endif /* USE_LIBXML */
#include "catalog/namespace.h"
@@ -65,6 +67,7 @@
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "access/tupmacs.h"
#include "utils/xml.h"
@@ -86,6 +89,7 @@
static int parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar *encoding);
+static text *xml_xmlnodetotext(xmlNodePtr cur);
#endif /* USE_LIBXML */
@@ -1463,7 +1467,6 @@
return buf.data;
}
-
/*
* Map SQL value to XML value; see SQL/XML:2003 section 9.16.
*/
@@ -2334,3 +2337,238 @@
else
appendStringInfoString(result, "</row>\n\n");
}
+
+
+/*
+ * XPath related functions
+ */
+
+/*
+ * Convert XML node to text (return only value, it's not dumping)
+ */
+text *
+xml_xmlnodetotext(xmlNodePtr cur)
+{
+ xmlChar *str;
+ text *result;
+ size_t len;
+
+ str = xmlXPathCastNodeToString(cur);
+ len = strlen((char *) str);
+ result = (text *) palloc(len + VARHDRSZ);
+ VARATT_SIZEP(result) = len + VARHDRSZ;
+ memcpy(VARDATA(result), str, len);
+
+ return result;
+}
+
+/*
+ * Evaluate XPath expression and return array of XML values.
+ * As we have no support of XQuery sequences yet, this functions seems
+ * to be the most useful one (array of XML functions plays a role of
+ * some kind of substritution for XQuery sequences).
+
+ * Workaround here: we parse XML data in different way to allow XPath for
+ * fragments (see "XPath for fragment" TODO comment inside).
+ */
+Datum
+xpath_array(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ ArrayBuildState *astate = NULL;
+ xmlParserCtxtPtr ctxt = NULL;
+ xmlDocPtr doc = NULL;
+ xmlXPathContextPtr xpathctx = NULL;
+ xmlXPathCompExprPtr xpathcomp = NULL;
+ xmlXPathObjectPtr xpathobj = NULL;
+ int32 len, xpath_len;
+ xmlChar *string, *xpath_expr;
+ bool res_is_null = FALSE;
+ int i;
+ xmltype *data = PG_GETARG_XML_P(1);
+ text *xpath_expr_text = PG_GETARG_TEXT_P(0);
+ ArrayType *namespaces;
+ int *dims, ndims, ns_count = 0, bitmask = 1;
+ char *ptr;
+ bits8 *bitmap;
+ char **ns_names = NULL, **ns_uris = NULL;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+
+ /* Namespace mappings passed as text[].
+ * Assume that 2-dimensional array has been passed,
+ * the 1st subarray is array of names, the 2nd -- array of URIs,
+ * example: ARRAY[ARRAY['myns', 'myns2'], ARRAY['http://example.com', 'http://example2.com']].
+ */
+ if (!PG_ARGISNULL(2))
+ {
+ namespaces = PG_GETARG_ARRAYTYPE_P(2);
+ ndims = ARR_NDIM(namespaces);
+ dims = ARR_DIMS(namespaces);
+
+ /* Sanity check */
+ if (ndims != 2)
+ ereport(ERROR, (errmsg("invalid array passed for namespace mappings"),
+ errdetail("Only 2-dimensional array may be used for namespace mappings.")));
+
+ Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
+
+ ns_count = ArrayGetNItems(ndims, dims) / 2;
+ get_typlenbyvalalign(ARR_ELEMTYPE(namespaces),
+ &typlen, &typbyval, &typalign);
+ ns_names = (char **) palloc(ns_count * sizeof(char *));
+ ns_uris = (char **) palloc(ns_count * sizeof(char *));
+ ptr = ARR_DATA_PTR(namespaces);
+ bitmap = ARR_NULLBITMAP(namespaces);
+ bitmask = 1;
+
+ for (i = 0; i < ns_count * 2; i++)
+ {
+ if (bitmap && (*bitmap & bitmask) == 0)
+ ereport(ERROR, (errmsg("neither namespace nor URI may be NULL"))); /* TODO: better message */
+ else
+ {
+ if (i < ns_count)
+ ns_names[i] = DatumGetCString(DirectFunctionCall1(textout,
+ PointerGetDatum(ptr)));
+ else
+ ns_uris[i - ns_count] = DatumGetCString(DirectFunctionCall1(textout,
+ PointerGetDatum(ptr)));
+ ptr = att_addlength(ptr, typlen, PointerGetDatum(ptr));
+ ptr = (char *) att_align(ptr, typalign);
+ }
+
+ /* advance bitmap pointer if any */
+ if (bitmap)
+ {
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ bitmap++;
+ bitmask = 1;
+ }
+ }
+ }
+ }
+
+ len = VARSIZE(data) - VARHDRSZ;
+ xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
+ if (xpath_len == 0)
+ ereport(ERROR, (errmsg("empty XPath expression")));
+
+ if (xmlStrncmp((xmlChar *) VARDATA(data), (xmlChar *) "<?xml", 5) == 0)
+ {
+ string = palloc(len + 1);
+ memcpy(string, VARDATA(data), len);
+ string[len] = '\0';
+ xpath_expr = palloc(xpath_len + 1);
+ memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+ xpath_expr[xpath_len] = '\0';
+ }
+ else
+ {
+ /* use "<x>...</x>" as dummy root element to enable XPath for fragments */
+ /* TODO: (XPath for fragment) find better solution to work with XML fragment! */
+ string = xmlStrncatNew((xmlChar *) "<x>", (xmlChar *) VARDATA(data), len);
+ string = xmlStrncat(string, (xmlChar *) "</x>", 5);
+ len += 7;
+ xpath_expr = xmlStrncatNew((xmlChar *) "/x", (xmlChar *) VARDATA(xpath_expr_text), xpath_len);
+ len += 2;
+ }
+
+ xml_init();
+
+ PG_TRY();
+ {
+ /* redundant XML parsing (two parsings for the same value in the same session are possible) */
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL)
+ xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not allocate parser context");
+ doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+ if (doc == NULL)
+ xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "could not parse XML data");
+ xpathctx = xmlXPathNewContext(doc);
+ if (xpathctx == NULL)
+ xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not allocate XPath context");
+ xpathctx->node = xmlDocGetRootElement(doc);
+ if (xpathctx->node == NULL)
+ xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not find root XML element");
+
+ /* register namespaces, if any */
+ if ((ns_count > 0) && ns_names && ns_uris)
+ for (i = 0; i < ns_count; i++)
+ if (0 != xmlXPathRegisterNs(xpathctx, (xmlChar *) ns_names[i], (xmlChar *) ns_uris[i]))
+ ereport(ERROR,
+ (errmsg("could not register XML namespace with prefix=\"%s\" and href=\"%s\"", ns_names[i], ns_uris[i])));
+
+ xpathcomp = xmlXPathCompile(xpath_expr);
+ if (xpathcomp == NULL)
+ xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+ "invalid XPath expression"); /* TODO: show proper XPath error details */
+
+ xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
+ xmlXPathFreeCompExpr(xpathcomp);
+ if (xpathobj == NULL)
+ ereport(ERROR, (errmsg("could not create XPath object")));
+
+ if (xpathobj->nodesetval == NULL)
+ res_is_null = TRUE;
+
+ if (!res_is_null && xpathobj->nodesetval->nodeNr == 0)
+ /* TODO maybe empty array should be here, not NULL? (if so -- fix segfault) */
+ /*PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));*/
+ res_is_null = TRUE;
+
+ if (!res_is_null)
+ for (i = 0; i < xpathobj->nodesetval->nodeNr; i++)
+ {
+ Datum elem;
+ bool elemisnull = false;
+ elem = PointerGetDatum(xml_xmlnodetotext(xpathobj->nodesetval->nodeTab[i]));
+ astate = accumArrayResult(astate, elem,
+ elemisnull, XMLOID,
+ CurrentMemoryContext);
+ }
+
+ xmlXPathFreeObject(xpathobj);
+ xmlXPathFreeContext(xpathctx);
+ xmlFreeParserCtxt(ctxt);
+ xmlFreeDoc(doc);
+ xmlCleanupParser();
+ }
+ PG_CATCH();
+ {
+ if (xpathcomp)
+ xmlXPathFreeCompExpr(xpathcomp);
+ if (xpathobj)
+ xmlXPathFreeObject(xpathobj);
+ if (xpathctx)
+ xmlXPathFreeContext(xpathctx);
+ if (doc)
+ xmlFreeDoc(doc);
+ if (ctxt)
+ xmlFreeParserCtxt(ctxt);
+ xmlCleanupParser();
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ if (res_is_null)
+ {
+ PG_RETURN_NULL();
+ }
+ else
+ {
+ PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+ }
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.446
diff -u -r1.446 pg_proc.h
--- src/include/catalog/pg_proc.h 20 Feb 2007 10:00:25 -0000 1.446
+++ src/include/catalog/pg_proc.h 20 Feb 2007 23:20:57 -0000
@@ -4071,6 +4071,10 @@
DATA(insert OID = 2930 ( query_to_xml_and_xmlschema PGNSP PGUID 12 100 0 f f t f s 4 142 "25 16 16 25" _null_ _null_ "{query,nulls,tableforest,targetns}" query_to_xml_and_xmlschema - _null_ ));
DESCR("map query result and structure to XML and XML Schema");
+DATA(insert OID = 2931 ( xpath_array PGNSP PGUID 12 1 0 f f t f i 2 143 "25 142" _null_ _null_ _null_ xpath_array - _null_ ));
+DESCR("evaluate XPath expression");
+DATA(insert OID = 2932 ( xpath_array PGNSP PGUID 12 1 0 f f t f i 3 143 "25 142 1009" _null_ _null_ _null_ xpath_array - _null_ ));
+DESCR("evaluate XPath expression, with namespaces support");
/* uuid */
DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 f f t f i 1 2950 "2275" _null_ _null_ _null_ uuid_in - _null_ ));
Index: src/include/utils/xml.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/xml.h,v
retrieving revision 1.16
diff -u -r1.16 xml.h
--- src/include/utils/xml.h 16 Feb 2007 07:46:55 -0000 1.16
+++ src/include/utils/xml.h 20 Feb 2007 23:20:57 -0000
@@ -36,6 +36,7 @@
extern Datum texttoxml(PG_FUNCTION_ARGS);
extern Datum xmltotext(PG_FUNCTION_ARGS);
extern Datum xmlvalidate(PG_FUNCTION_ARGS);
+extern Datum xpath_array(PG_FUNCTION_ARGS);
extern Datum table_to_xml(PG_FUNCTION_ARGS);
extern Datum query_to_xml(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?
http://archives.postgresql.org