Andrew Dunstan wrote:
Tom Lane wrote:
Hmm, does this proposal require adding a test of well-formed-ness to
a code path that doesn't currently have one? If so, is that likely
to contribute any noticeable slowdown?
I can't offhand see an objection to this other than possible performance
impact.
Yeah, testing the well-formedness might cost a bit. We could
short-circuit the test by applying some comparatively fast heuristic
tests.
Or we could decide that we'll just fix the xpath prefix part for 8.3
and keep the wrapping. I don't want to spend a huge effort on fixing
something I regard as fundamentally broken.
I'll do some tests to see what the cost of extra xml parsing might be.
The extra cost appears to be fairly negligible.
regression=# create table xpathtest3 as select xmlconcat(xmlelement(name
unique1, unique1), '\n\t',xmlelement(name unique2, unique2),
'\n\t',xmlelement(name two, two), '\n\t',xmlelement(name four,
four),'\n\t',xmlelement(name ten,ten),'\n\t',xmlelement(name
twenty,twenty),'\n\t',xmlelement(name
hundred,hundred),'\n\t',xmlelement(name
thousand,thousand),'\n\t',xmlelement(name
twothusand,twothousand),'\n\t',xmlelement(name
fivethous,fivethous),'\n\t',xmlelement(name
tenthous,tenthous),'\n\t',xmlelement(name
odd,odd),'\n\t',xmlelement(name even,even),'\n\t',xmlelement(name
stringu1,stringu1),'\n\t',xmlelement(name
stringu2,stringu2),'\n\t',xmlelement(name string4,string4),'\n') from tenk1;
regression=# select count(*) from (select
xpath('//two[text()="0"]/text()',xmlconcat) as elems from xpathtest3,
generate_series(1,10) ) x ;
count
--------
100000
(1 row)
Time: 27.722 ms
Proposed patch for 8.3 attached. (Note: it only reparses in the
non-document case)
cheers
andrew
Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.68.2.6
diff -c -r1.68.2.6 xml.c
*** src/backend/utils/adt/xml.c 10 Nov 2008 18:02:27 -0000 1.68.2.6
--- src/backend/utils/adt/xml.c 27 Feb 2009 20:59:28 -0000
***************
*** 3320,3360 ****
xml_init();
! /*
! * To handle both documents and fragments, regardless of the fact whether
! * the XML datum has a single root (XML well-formedness), we wrap the XML
! * datum in a dummy element (<x>...</x>) and extend the XPath expression
! * accordingly. To do it, throw away the XML prolog, if any.
! */
! if (len >= 5 &&
! xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
! {
! i = 5;
! while (i < len &&
! !(datastr[i - 1] == '?' && datastr[i] == '>'))
! i++;
!
! if (i == len)
! xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
! "could not parse XML data");
! ++i;
! datastr += i;
! len -= i;
! }
!
! string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
! memcpy(string, "<x>", 3);
! memcpy(string + 3, datastr, len);
! memcpy(string + 3 + len, "</x>", 5);
! len += 7;
!
! xpath_expr = (xmlChar *) palloc((xpath_len + 3) * sizeof(xmlChar));
! memcpy(xpath_expr, "/x", 2);
! memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
! xpath_expr[xpath_len + 2] = '\0';
! xpath_len += 2;
xmlInitParser();
--- 3320,3332 ----
xml_init();
! string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
! xpath_expr = (xmlChar *) palloc((xpath_len + 5) * sizeof(xmlChar));
! memcpy (string, datastr, len);
! string[len] = '\0';
!
xmlInitParser();
***************
*** 3367,3375 ****
xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
! if (doc == NULL)
! xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
! "could not parse XML data");
xpathctx = xmlXPathNewContext(doc);
if (xpathctx == NULL)
xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
--- 3339,3410 ----
xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
!
! if (doc == NULL || xmlDocGetRootElement(doc) == NULL)
! {
!
! /*
! * In case we have a fragment rather than a well-formed XML document,
! * which has a single root (XML well-formedness), we try again after
! * transforming the xml by stripping away the XML prolog, if any, and
! * wrapping the remainder in a dummy element (<x>...</x>),
! * and later extending the XPath expression accordingly.
! */
! if (len >= 5 &&
! xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
! {
! i = 5;
! while (i < len &&
! !(datastr[i - 1] == '?' && datastr[i] == '>'))
! i++;
!
! if (i == len)
! xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
! "could not parse XML data");
!
! ++i;
!
! datastr += i;
! len -= i;
! }
!
! memcpy(string, "<x>", 3);
! memcpy(string + 3, datastr, len);
! memcpy(string + 3 + len, "</x>", 5);
! len += 7;
!
! doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
!
! if (doc == NULL)
! xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
! "could not parse XML data");
!
! if (*VARDATA(xpath_expr_text) == '/')
! {
! memcpy(xpath_expr, "/x", 2);
! memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
! xpath_expr[xpath_len + 2] = '\0';
! xpath_len += 2;
! }
! else
! {
! memcpy(xpath_expr, "/x//", 4);
! memcpy(xpath_expr + 4, VARDATA(xpath_expr_text), xpath_len);
! xpath_expr[xpath_len + 4] = '\0';
! xpath_len += 4;
! }
!
! }
! else
! {
! /*
! * if we didn't need to mangle the XML, we don't need to mangle the
! * xpath either.
! */
! memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
! xpath_expr[xpath_len] = '\0';
! }
!
xpathctx = xmlXPathNewContext(doc);
if (xpathctx == NULL)
xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers