Re: [HACKERS] xpath processing brain dead

Andrew Dunstan Fri, 27 Feb 2009 13:46:33 -0800


Andrew Dunstan wrote:

Tom Lane wrote:
Hmm, does this proposal require adding a test of well-formed-ness to
a code path that doesn't currently have one?  If so, is that likely
to contribute any noticeable slowdown?

I can't offhand see an objection to this other than possible performance
impact.
Yeah, testing the well-formedness might cost a bit. We couldshort-circuit the test by applying some comparatively fast heuristictests.
Or we could decide that we'll just fix the xpath prefix part for 8.3and keep the wrapping. I don't want to spend a huge effort on fixingsomething I regard as fundamentally broken.
I'll do some tests to see what the cost of extra xml parsing might be.



The extra cost appears to be fairly negligible.

regression=# create table xpathtest3 as select xmlconcat(xmlelement(nameunique1, unique1), '\n\t',xmlelement(name unique2, unique2),'\n\t',xmlelement(name two, two), '\n\t',xmlelement(name four,four),'\n\t',xmlelement(name ten,ten),'\n\t',xmlelement(nametwenty,twenty),'\n\t',xmlelement(namehundred,hundred),'\n\t',xmlelement(namethousand,thousand),'\n\t',xmlelement(nametwothusand,twothousand),'\n\t',xmlelement(namefivethous,fivethous),'\n\t',xmlelement(nametenthous,tenthous),'\n\t',xmlelement(nameodd,odd),'\n\t',xmlelement(name even,even),'\n\t',xmlelement(namestringu1,stringu1),'\n\t',xmlelement(namestringu2,stringu2),'\n\t',xmlelement(name string4,string4),'\n') from tenk1;

regression=# select count(*) from (selectxpath('//two[text()="0"]/text()',xmlconcat) as elems from xpathtest3,generate_series(1,10) ) x ;count--------

100000
(1 row)

Time: 27.722 ms

Proposed patch for 8.3 attached. (Note: it only reparses in thenon-document case)


cheers

andrew

Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.68.2.6
diff -c -r1.68.2.6 xml.c
*** src/backend/utils/adt/xml.c	10 Nov 2008 18:02:27 -0000	1.68.2.6
--- src/backend/utils/adt/xml.c	27 Feb 2009 20:59:28 -0000
***************
*** 3320,3360 ****
  
  	xml_init();
  
! 	/*
! 	 * To handle both documents and fragments, regardless of the fact whether
! 	 * the XML datum has a single root (XML well-formedness), we wrap the XML
! 	 * datum in a dummy element (<x>...</x>) and extend the XPath expression
! 	 * accordingly.  To do it, throw away the XML prolog, if any.
! 	 */
! 	if (len >= 5 &&
! 		xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
! 	{
! 		i = 5;
! 		while (i < len &&
! 			   !(datastr[i - 1] == '?' && datastr[i] == '>'))
! 			i++;
! 
! 		if (i == len)
! 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
! 						"could not parse XML data");
  
! 		++i;
  
! 		datastr += i;
! 		len -= i;
! 	}
! 
! 	string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
! 	memcpy(string, "<x>", 3);
! 	memcpy(string + 3, datastr, len);
! 	memcpy(string + 3 + len, "</x>", 5);
! 	len += 7;
! 
! 	xpath_expr = (xmlChar *) palloc((xpath_len + 3) * sizeof(xmlChar));
! 	memcpy(xpath_expr, "/x", 2);
! 	memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
! 	xpath_expr[xpath_len + 2] = '\0';
! 	xpath_len += 2;
  
  	xmlInitParser();
  
--- 3320,3332 ----
  
  	xml_init();
  
! 	string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
  
! 	xpath_expr = (xmlChar *) palloc((xpath_len + 5) * sizeof(xmlChar));
  
! 	memcpy (string, datastr, len);
! 	string[len] = '\0';
! 	
  
  	xmlInitParser();
  
***************
*** 3367,3375 ****
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
  					"could not allocate parser context");
  	doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
! 	if (doc == NULL)
! 		xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
! 					"could not parse XML data");
  	xpathctx = xmlXPathNewContext(doc);
  	if (xpathctx == NULL)
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
--- 3339,3410 ----
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
  					"could not allocate parser context");
  	doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
! 
! 	if (doc == NULL || xmlDocGetRootElement(doc) == NULL)
! 	{
! 
! 		/*
! 		 * In case we have a fragment rather than a well-formed XML document,
! 		 * which has a single root (XML well-formedness), we try again after
! 		 * transforming the xml by stripping away the XML prolog, if any, and
! 		 * wrapping the remainder in a dummy element (<x>...</x>),
! 		 * and later extending the XPath expression accordingly. 
! 		 */
! 		if (len >= 5 &&
! 			xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
! 		{
! 			i = 5;
! 			while (i < len &&
! 				   !(datastr[i - 1] == '?' && datastr[i] == '>'))
! 				i++;
! 			
! 			if (i == len)
! 				xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
! 							"could not parse XML data");
! 			
! 			++i;
! 			
! 			datastr += i;
! 			len -= i;
! 		}
! 
! 		memcpy(string, "<x>", 3);
! 		memcpy(string + 3, datastr, len);
! 		memcpy(string + 3 + len, "</x>", 5);
! 		len += 7;
! 
! 		doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
! 
!  		if (doc == NULL)
! 			xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
! 						"could not parse XML data");
! 
! 		if (*VARDATA(xpath_expr_text) == '/')
! 		{
! 			memcpy(xpath_expr, "/x", 2);
! 			memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
! 			xpath_expr[xpath_len + 2] = '\0';
! 			xpath_len += 2;
! 		}
! 		else
! 		{
! 			memcpy(xpath_expr, "/x//", 4);
! 			memcpy(xpath_expr + 4, VARDATA(xpath_expr_text), xpath_len);
! 			xpath_expr[xpath_len + 4] = '\0';
! 			xpath_len += 4;
! 		}
! 
! 	}
! 	else
! 	{
! 		/* 
! 		 * if we didn't need to mangle the XML, we don't need to mangle the
! 		 * xpath either.
! 		 */
! 		memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
! 		xpath_expr[xpath_len] = '\0';
! 	}
! 
  	xpathctx = xmlXPathNewContext(doc);
  	if (xpathctx == NULL)
  		xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] xpath processing brain dead

Reply via email to