Robert Haas wrote:
Please email your patch to the list (replying to this email is fine)
and add it here:
https://commitfest.postgresql.org/action/commitfest_view/open
Here's my patch, developed against HEAD, that adds the function 'xpath_exists'. The function is a lot simpler than originally thought, so none of the string manipulation previously discussed was required. I've also included some regression tests that test the function with and without xml namespaces. I should note that before I added my tests all existing tests passed.

One observation that can be made is that I've largely copied the existing xpath function and altered it to use a different method from the libxml API. I've done it to save me redoing all the namespace handling, however it's apparent to me that if we wanted to expose more of the libxml api we will quickly start having a lot of duplicate code. I notice that refactoring existing code whilst adding new code is generally frowned upon, so once this patch is accepted I will look to refactor the xpath and xpath_exists function. I could even add an xpath_count method at the same time ;) .

Thanks in advance for any and all feedback,

--
Mike Fowler
Registered Linux user: 379787

"I could be a genius if I just put my mind to it, and I,
I could do anything, if only I could get 'round to it"
-PULP 'Glory Days'

Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.97
diff -c -r1.97 xml.c
*** src/backend/utils/adt/xml.c	3 Mar 2010 17:29:45 -0000	1.97
--- src/backend/utils/adt/xml.c	11 May 2010 07:54:53 -0000
***************
*** 3495,3497 ****
--- 3495,3670 ----
  	return 0;
  #endif
  }
+ 
+ /*
+  * Determines if the node specified by the supplied XPath exists
+  * in a given XML document, returning a boolean.
+  *
+  * It is up to the user to ensure that the XML passed is in fact
+  * an XML document - XPath doesn't work easily on fragments without
+  * a context node being known.
+  */
+ Datum
+ xpath_exists(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+ 	text	   *xpath_expr_text = PG_GETARG_TEXT_P(0);
+ 	xmltype    *data = PG_GETARG_XML_P(1);
+ 	ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+ 	ArrayBuildState *astate = NULL;
+ 	xmlParserCtxtPtr ctxt = NULL;
+ 	xmlDocPtr	doc = NULL;
+ 	xmlXPathContextPtr xpathctx = NULL;
+ 	xmlXPathCompExprPtr xpathcomp = NULL;
+ 	char	   *datastr;
+ 	int32		len;
+ 	int32		xpath_len;
+ 	xmlChar    *string;
+ 	xmlChar    *xpath_expr;
+ 	int			i;
+ 	int			res_nitems;
+ 	int			ndim;
+ 	Datum	   *ns_names_uris;
+ 	bool	   *ns_names_uris_nulls;
+ 	int			ns_count;
+ 	int			result;
+ 
+ 	/*
+ 	 * Namespace mappings are passed as text[].  If an empty array is passed
+ 	 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
+ 	 * Else, a 2-dimensional array with length of the second axis being equal
+ 	 * to 2 should be passed, i.e., every subarray contains 2 elements, the
+ 	 * first element defining the name, the second one the URI.  Example:
+ 	 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
+ 	 * 'http://example2.com']].
+ 	 */
+ 	ndim = ARR_NDIM(namespaces);
+ 	if (ndim != 0)
+ 	{
+ 		int		   *dims;
+ 
+ 		dims = ARR_DIMS(namespaces);
+ 
+ 		if (ndim != 2 || dims[1] != 2)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_DATA_EXCEPTION),
+ 					 errmsg("invalid array for XML namespace mapping"),
+ 					 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
+ 
+ 		Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
+ 
+ 		deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
+ 						  &ns_names_uris, &ns_names_uris_nulls,
+ 						  &ns_count);
+ 
+ 		Assert((ns_count % 2) == 0);	/* checked above */
+ 		ns_count /= 2;			/* count pairs only */
+ 	}
+ 	else
+ 	{
+ 		ns_names_uris = NULL;
+ 		ns_names_uris_nulls = NULL;
+ 		ns_count = 0;
+ 	}
+ 
+ 	datastr = VARDATA(data);
+ 	len = VARSIZE(data) - VARHDRSZ;
+ 	xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
+ 	if (xpath_len == 0)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_DATA_EXCEPTION),
+ 				 errmsg("empty XPath expression")));
+ 
+ 	string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+ 	memcpy(string, datastr, len);
+ 	string[len] = '\0';
+ 
+ 	xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
+ 	memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+ 	xpath_expr[xpath_len] = '\0';
+ 
+ 	pg_xml_init();
+ 	xmlInitParser();
+ 
+ 	PG_TRY();
+ 	{
+ 		/*
+ 		 * redundant XML parsing (two parsings for the same value during one
+ 		 * command execution are possible)
+ 		 */
+ 		ctxt = xmlNewParserCtxt();
+ 		if (ctxt == NULL)
+ 			xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+ 						"could not allocate parser context");
+ 		doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+ 		if (doc == NULL)
+ 			xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ 						"could not parse XML document");
+ 		xpathctx = xmlXPathNewContext(doc);
+ 		if (xpathctx == NULL)
+ 			xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+ 						"could not allocate XPath context");
+ 		xpathctx->node = xmlDocGetRootElement(doc);
+ 		if (xpathctx->node == NULL)
+ 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+ 						"could not find root XML element");
+ 
+ 		/* register namespaces, if any */
+ 		if (ns_count > 0)
+ 		{
+ 			for (i = 0; i < ns_count; i++)
+ 			{
+ 				char	   *ns_name;
+ 				char	   *ns_uri;
+ 
+ 				if (ns_names_uris_nulls[i * 2] ||
+ 					ns_names_uris_nulls[i * 2 + 1])
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ 					  errmsg("neither namespace name nor URI may be null")));
+ 				ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
+ 				ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
+ 				if (xmlXPathRegisterNs(xpathctx,
+ 									   (xmlChar *) ns_name,
+ 									   (xmlChar *) ns_uri) != 0)
+ 					ereport(ERROR,		/* is this an internal error??? */
+ 							(errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
+ 									ns_name, ns_uri)));
+ 			}
+ 		}
+ 
+ 		xpathcomp = xmlXPathCompile(xpath_expr);
+ 		if (xpathcomp == NULL)	/* TODO: show proper XPath error details */
+ 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+ 						"invalid XPath expression");
+ 
+ 		result = xmlXPathCompiledEvalToBoolean(xpathcomp, xpathctx);
+ 		if (result == -1)	/* TODO: reason? */
+ 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+ 						"could not create XPath object");
+ 	}
+ 	PG_CATCH();
+ 	{
+ 		if (xpathcomp)
+ 			xmlXPathFreeCompExpr(xpathcomp);
+ 		if (xpathctx)
+ 			xmlXPathFreeContext(xpathctx);
+ 		if (doc)
+ 			xmlFreeDoc(doc);
+ 		if (ctxt)
+ 			xmlFreeParserCtxt(ctxt);
+ 		PG_RE_THROW();
+ 	}
+ 	PG_END_TRY();
+ 
+ 	xmlXPathFreeCompExpr(xpathcomp);
+ 	xmlXPathFreeContext(xpathctx);
+ 	xmlFreeDoc(doc);
+ 	xmlFreeParserCtxt(ctxt);
+ 
+ 	PG_RETURN_BOOL(result);
+ #else
+ 	NO_XML_SUPPORT();
+ 	return 0;
+ #endif
+ }
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.570
diff -c -r1.570 pg_proc.h
*** src/include/catalog/pg_proc.h	26 Feb 2010 02:01:21 -0000	1.570
--- src/include/catalog/pg_proc.h	11 May 2010 07:46:09 -0000
***************
*** 4385,4390 ****
--- 4385,4395 ----
  DATA(insert OID = 2932 (  xpath		 PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_ "select pg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ ));
  DESCR("evaluate XPath expression");
  
+ DATA(insert OID = 3037 (  xpath_exists	 PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16 "25 142 1009" _null_ _null_ _null_ _null_ xpath_exists _null_ _null_ _null_ ));
+ DESCR("evaluate XPath expression in a boolean context, with namespaces support");
+ DATA(insert OID = 3038 (  xpath_exists	 PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_ "select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ ));
+ DESCR("evaluate XPath expression in a boolean context");
+ 
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in		   PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ ));
  DESCR("I/O");
Index: src/include/utils/xml.h
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/include/utils/xml.h,v
retrieving revision 1.31
diff -c -r1.31 xml.h
*** src/include/utils/xml.h	3 Mar 2010 17:29:45 -0000	1.31
--- src/include/utils/xml.h	10 May 2010 21:28:48 -0000
***************
*** 37,42 ****
--- 37,43 ----
  extern Datum xmltotext(PG_FUNCTION_ARGS);
  extern Datum xmlvalidate(PG_FUNCTION_ARGS);
  extern Datum xpath(PG_FUNCTION_ARGS);
+ extern Datum xpath_exists(PG_FUNCTION_ARGS);
  
  extern Datum table_to_xml(PG_FUNCTION_ARGS);
  extern Datum query_to_xml(PG_FUNCTION_ARGS);
Index: src/test/regress/expected/xml.out
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/test/regress/expected/xml.out,v
retrieving revision 1.25
diff -c -r1.25 xml.out
*** src/test/regress/expected/xml.out	9 Jun 2009 22:00:57 -0000	1.25
--- src/test/regress/expected/xml.out	11 May 2010 09:04:20 -0000
***************
*** 502,504 ****
--- 502,545 ----
   {<b>two</b>,<b>etc</b>}
  (1 row)
  
+ -- Text xpath_exists evalutation
+ INSERT INTO xmltest VALUES (4, '<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5, '<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu xmlns:myns="http://myns.com";><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu xmlns:myns="http://myns.com";><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data);
+  count 
+ -------
+      0
+ (1 row)
+ 
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data);
+  count 
+ -------
+      2
+ (1 row)
+ 
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data);
+  count 
+ -------
+      1
+ (1 row)
+ 
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count 
+ -------
+      0
+ (1 row)
+ 
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count 
+ -------
+      2
+ (1 row)
+ 
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() = ''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count 
+ -------
+      1
+ (1 row)
+ 
Index: src/test/regress/sql/xml.sql
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/test/regress/sql/xml.sql,v
retrieving revision 1.19
diff -c -r1.19 xml.sql
*** src/test/regress/sql/xml.sql	9 Jun 2009 22:00:57 -0000	1.19
--- src/test/regress/sql/xml.sql	11 May 2010 08:18:52 -0000
***************
*** 163,165 ****
--- 163,178 ----
  SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
  SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
  SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+ 
+ -- Text xpath_exists evalutation
+ INSERT INTO xmltest VALUES (4, '<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5, '<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu xmlns:myns="http://myns.com";><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu xmlns:myns="http://myns.com";><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ 
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() = ''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]);
\ No newline at end of file
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to