Andrew Dunstan wrote:


Tom Lane wrote:
Hmm, does this proposal require adding a test of well-formed-ness to
a code path that doesn't currently have one?  If so, is that likely
to contribute any noticeable slowdown?

I can't offhand see an objection to this other than possible performance
impact.


Yeah, testing the well-formedness might cost a bit. We could short-circuit the test by applying some comparatively fast heuristic tests.

Or we could decide that we'll just fix the xpath prefix part for 8.3 and keep the wrapping. I don't want to spend a huge effort on fixing something I regard as fundamentally broken.

I'll do some tests to see what the cost of extra xml parsing might be.


The extra cost appears to be fairly negligible.

regression=# create table xpathtest3 as select xmlconcat(xmlelement(name unique1, unique1), '\n\t',xmlelement(name unique2, unique2), '\n\t',xmlelement(name two, two), '\n\t',xmlelement(name four, four),'\n\t',xmlelement(name ten,ten),'\n\t',xmlelement(name twenty,twenty),'\n\t',xmlelement(name hundred,hundred),'\n\t',xmlelement(name thousand,thousand),'\n\t',xmlelement(name twothusand,twothousand),'\n\t',xmlelement(name fivethous,fivethous),'\n\t',xmlelement(name tenthous,tenthous),'\n\t',xmlelement(name odd,odd),'\n\t',xmlelement(name even,even),'\n\t',xmlelement(name stringu1,stringu1),'\n\t',xmlelement(name stringu2,stringu2),'\n\t',xmlelement(name string4,string4),'\n') from tenk1;

regression=# select count(*) from (select xpath('//two[text()="0"]/text()',xmlconcat) as elems from xpathtest3, generate_series(1,10) ) x ; count --------
100000
(1 row)

Time: 27.722 ms


Proposed patch for 8.3 attached. (Note: it only reparses in the non-document case)

cheers

andrew


Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.68.2.6
diff -c -r1.68.2.6 xml.c
*** src/backend/utils/adt/xml.c	10 Nov 2008 18:02:27 -0000	1.68.2.6
--- src/backend/utils/adt/xml.c	27 Feb 2009 20:59:28 -0000
***************
*** 3320,3360 ****
  
  	xml_init();
  
! 	/*
! 	 * To handle both documents and fragments, regardless of the fact whether
! 	 * the XML datum has a single root (XML well-formedness), we wrap the XML
! 	 * datum in a dummy element (<x>...</x>) and extend the XPath expression
! 	 * accordingly.  To do it, throw away the XML prolog, if any.
! 	 */
! 	if (len >= 5 &&
! 		xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
! 	{
! 		i = 5;
! 		while (i < len &&
! 			   !(datastr[i - 1] == '?' && datastr[i] == '>'))
! 			i++;
! 
! 		if (i == len)
! 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
! 						"could not parse XML data");
  
! 		++i;
  
! 		datastr += i;
! 		len -= i;
! 	}
! 
! 	string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
! 	memcpy(string, "<x>", 3);
! 	memcpy(string + 3, datastr, len);
! 	memcpy(string + 3 + len, "</x>", 5);
! 	len += 7;
! 
! 	xpath_expr = (xmlChar *) palloc((xpath_len + 3) * sizeof(xmlChar));
! 	memcpy(xpath_expr, "/x", 2);
! 	memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
! 	xpath_expr[xpath_len + 2] = '\0';
! 	xpath_len += 2;
  
  	xmlInitParser();
  
--- 3320,3332 ----
  
  	xml_init();
  
! 	string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
  
! 	xpath_expr = (xmlChar *) palloc((xpath_len + 5) * sizeof(xmlChar));
  
! 	memcpy (string, datastr, len);
! 	string[len] = '\0';
! 	
  
  	xmlInitParser();
  
***************
*** 3367,3375 ****
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
  					"could not allocate parser context");
  	doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
! 	if (doc == NULL)
! 		xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
! 					"could not parse XML data");
  	xpathctx = xmlXPathNewContext(doc);
  	if (xpathctx == NULL)
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
--- 3339,3410 ----
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
  					"could not allocate parser context");
  	doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
! 
! 	if (doc == NULL || xmlDocGetRootElement(doc) == NULL)
! 	{
! 
! 		/*
! 		 * In case we have a fragment rather than a well-formed XML document,
! 		 * which has a single root (XML well-formedness), we try again after
! 		 * transforming the xml by stripping away the XML prolog, if any, and
! 		 * wrapping the remainder in a dummy element (<x>...</x>),
! 		 * and later extending the XPath expression accordingly. 
! 		 */
! 		if (len >= 5 &&
! 			xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
! 		{
! 			i = 5;
! 			while (i < len &&
! 				   !(datastr[i - 1] == '?' && datastr[i] == '>'))
! 				i++;
! 			
! 			if (i == len)
! 				xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
! 							"could not parse XML data");
! 			
! 			++i;
! 			
! 			datastr += i;
! 			len -= i;
! 		}
! 
! 		memcpy(string, "<x>", 3);
! 		memcpy(string + 3, datastr, len);
! 		memcpy(string + 3 + len, "</x>", 5);
! 		len += 7;
! 
! 		doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
! 
!  		if (doc == NULL)
! 			xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
! 						"could not parse XML data");
! 
! 		if (*VARDATA(xpath_expr_text) == '/')
! 		{
! 			memcpy(xpath_expr, "/x", 2);
! 			memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
! 			xpath_expr[xpath_len + 2] = '\0';
! 			xpath_len += 2;
! 		}
! 		else
! 		{
! 			memcpy(xpath_expr, "/x//", 4);
! 			memcpy(xpath_expr + 4, VARDATA(xpath_expr_text), xpath_len);
! 			xpath_expr[xpath_len + 4] = '\0';
! 			xpath_len += 4;
! 		}
! 
! 	}
! 	else
! 	{
! 		/* 
! 		 * if we didn't need to mangle the XML, we don't need to mangle the
! 		 * xpath either.
! 		 */
! 		memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
! 		xpath_expr[xpath_len] = '\0';
! 	}
! 
  	xpathctx = xmlXPathNewContext(doc);
  	if (xpathctx == NULL)
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to