2014-11-05 21:50 GMT+07:00 Ali Akbar <the.ap...@gmail.com>: > 2014-11-04 22:16 GMT+07:00 Peter Eisentraut <pete...@gmx.net>: > >> I think the problem this patch is addressing is real, and your approach >>> is sound, but I'd ask you to go back to the xmlCopyNode() version, and >>> try to add a test case for why the second argument = 1 is necessary. I >>> don't see any other problems. >>> >> >> OK. Because upstream code is fixed in current version, i'll revert to the >> previous version. Test case added to regression test. With =1 argument, the >> result is correct: >> <local:piece xmlns:local=\"http://127.0.0.1\" xmlns=\"http://127.0.0.2\" >> id=\"1\"> >> <internal>number one</internal> >> <internal2/> >> </local:piece> >> >> without the argument, the result is not correct, all children will be >> lost. Because of that, the other regression test will fail too because the >> children is not copied: >> *** 584,593 **** >> >> -- Text XPath expressions evaluation >> SELECT xpath('/value', data) FROM xmltest; >> ! xpath >> ! ---------------------- >> ! {<value>one</value>} >> ! {<value>two</value>} >> (2 rows) >> >> SELECT xpath(NULL, NULL) IS NULL FROM xmltest; >> --- 584,593 ---- >> >> -- Text XPath expressions evaluation >> SELECT xpath('/value', data) FROM xmltest; >> ! xpath >> ! ------------ >> ! {<value/>} >> ! {<value/>} >> (2 rows) >> >> SELECT xpath(NULL, NULL) IS NULL FROM xmltest; >> *************** >> ... <cut> >> >> updated patch attached. >> > > I noticed somewhat big performance regression if the xml is large (i use > PRODML Product Volume sample from energistics.org): > * Without patch (tested 3 times): > select unnest(xpath('//a:flow', x, ARRAY[['a',' > http://www.prodml.org/schemas/1series']])) from u; > > unnest > > ----------------------------------------------------------------------------------------------- > <flow> > + > <kind>gas > lift</kind> + > ... > Time: 84,012 ms > Time: 85,683 ms > Time: 88,766 ms > > > * With latest v6 patch (notice the correct result with namespace > definition): > > select unnest(xpath('//a:flow', x, ARRAY[['a',' > http://www.prodml.org/schemas/1series']])) from u; > > unnest > > ----------------------------------------------------------------------------------------------- > <flow xmlns="http://www.prodml.org/schemas/1series"> > + > ... > Time: 108,912 ms > Time: 108,267 ms > Time: 114,848 ms > > > It's 23% performance regression. > > * Just curious, i'm also testing v5 patch performance (notice the > namespace in the result): > select unnest(xpath('//a:flow', x, ARRAY[['a',' > http://www.prodml.org/schemas/1series']])) from u; > > unnest > > ----------------------------------------------------------------------------------------------- > <flow xmlns="http://www.prodml.org/schemas/1series"> > + > <kind>gas > lift</kind> + > Time: 92,552 ms > Time: 97,440 ms > Time: 99,309 ms > > The regression is only 13%. I know the xmlCopyNode() version (v6 patch) is > much more cleaner than v5patch, should we consider the performance benefit? > > Anyway, thanks for the review! :) >
commit bac2739 in master by Tom Lane changes *astate definition in xml_xpathobjtoxmlarray, this attached v7 patch rebases v6 patch with latest master. For performance comparison, i also rebased the v5 patch attached with name v5-141126.patch -- Ali Akbar
*** a/src/backend/utils/adt/xml.c --- b/src/backend/utils/adt/xml.c *************** *** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone); static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int encoding); ! static text *xml_xmlnodetoxmltype(xmlNodePtr cur); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate); #endif /* USE_LIBXML */ static StringInfo query_to_xml_internal(const char *query, char *tablename, --- 141,151 ---- pg_enc encoding, int standalone); static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int encoding); ! static text *xml_xmlnodetoxmltype(xmlNodePtr cur, ! PgXmlErrorContext *xmlerrcxt); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate, ! PgXmlErrorContext *xmlerrcxt); #endif /* USE_LIBXML */ static StringInfo query_to_xml_internal(const char *query, char *tablename, *************** *** 3594,3620 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename, #ifdef USE_LIBXML /* * Convert XML node to text (dump subtree in case of element, * return value otherwise) */ static text * ! xml_xmlnodetoxmltype(xmlNodePtr cur) { xmltype *result; if (cur->type == XML_ELEMENT_NODE) { xmlBufferPtr buf; buf = xmlBufferCreate(); PG_TRY(); { xmlNodeDump(buf, NULL, cur, 0, 1); result = xmlBuffer_to_xmltype(buf); } PG_CATCH(); { xmlBufferFree(buf); PG_RE_THROW(); } --- 3596,3706 ---- #ifdef USE_LIBXML + /* check ns definition of node and its childrens. If any one of ns is + * not defined in node and it's children, but in the node's parent, + * copy the definition to node. + */ + static void + xml_checkandcopyns(xmlNodePtr root, + PgXmlErrorContext *xmlerrcxt, + xmlNodePtr node, + xmlNsPtr lastns_before) + { + xmlNsPtr ns = NULL; + xmlNsPtr cur_ns; + xmlNodePtr cur; + + if (node->ns != NULL) + { + /* check in new nses */ + cur_ns = lastns_before == NULL ? node->nsDef : lastns_before->next; + while (cur_ns != NULL) + { + if (cur_ns->href != NULL) + { + if (((cur_ns->prefix == NULL) && (node->ns->prefix == NULL)) || + ((cur_ns->prefix != NULL) && (node->ns->prefix != NULL) && + xmlStrEqual(cur_ns->prefix, node->ns->prefix))) + { + ns = cur_ns; + break; + } + } + cur_ns = cur_ns->next; + } + if (ns == NULL) /* not in new nses */ + { + ns = xmlSearchNs(NULL, node->parent, node->ns->prefix); + + if (ns != NULL) + { + ns = xmlNewNs(root, ns->href, ns->prefix); + + if (ns == NULL && xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlNs"); + } + } + } + /* check and copy ns for children recursively */ + cur = node->children; + while (cur != NULL) + { + xml_checkandcopyns(root, xmlerrcxt, cur, lastns_before); + cur = cur->next; + } + } + /* * Convert XML node to text (dump subtree in case of element, * return value otherwise) */ static text * ! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) { xmltype *result; if (cur->type == XML_ELEMENT_NODE) { xmlBufferPtr buf; + xmlNsPtr lastns_before; + xmlNsPtr ns; + xmlNsPtr next; buf = xmlBufferCreate(); + PG_TRY(); { + lastns_before = cur->nsDef; + if (lastns_before != NULL) + { + while (lastns_before->next != NULL) + lastns_before = lastns_before->next; + } + xml_checkandcopyns(cur, xmlerrcxt, cur, lastns_before); + xmlNodeDump(buf, NULL, cur, 0, 1); result = xmlBuffer_to_xmltype(buf); + + /* delete and free new nses */ + ns = lastns_before == NULL ? cur->nsDef : lastns_before->next; + while (ns != NULL) + { + next = ns->next; + xmlFree(ns); + ns = next; + } + + if (lastns_before == NULL) + cur->nsDef = NULL; + else + lastns_before->next = NULL; } PG_CATCH(); { + /* new namespaces will be freed while free-ing the node, so we + * won't free it here + */ xmlBufferFree(buf); PG_RE_THROW(); } *************** *** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur) */ static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate) { int result = 0; Datum datum; --- 3746,3753 ---- */ static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate, ! PgXmlErrorContext *xmlerrcxt) { int result = 0; Datum datum; *************** *** 3679,3685 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, for (i = 0; i < result; i++) { ! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i])); (void) accumArrayResult(astate, datum, false, XMLOID, CurrentMemoryContext); } --- 3766,3773 ---- for (i = 0; i < result; i++) { ! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], ! xmlerrcxt)); (void) accumArrayResult(astate, datum, false, XMLOID, CurrentMemoryContext); } *************** *** 3881,3889 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, * Extract the results as requested. */ if (res_nitems != NULL) ! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate); else ! (void) xml_xpathobjtoxmlarray(xpathobj, astate); } PG_CATCH(); { --- 3969,3977 ---- * Extract the results as requested. */ if (res_nitems != NULL) ! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); else ! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); } PG_CATCH(); { *** a/src/test/regress/expected/xml.out --- b/src/test/regress/expected/xml.out *************** *** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc --- 612,632 ---- {1,2} (1 row) + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + xpath + ------------------------------------------------------------------------------------------------------------------------------------------------ + {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"} + (1 row) + + SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + xpath + -------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"<local:data xmlns:local=\"http://127.0.0.1\"> + + <local:piece id=\"1\">number one</local:piece> + + <local:piece id=\"2\"/> + + </local:data>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"} + (1 row) + SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); xpath ------------------------- *** a/src/test/regress/expected/xml_1.out --- b/src/test/regress/expected/xml_1.out *************** *** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht... --- 514,531 ---- ^ DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + ERROR: unsupported XML feature + LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/... + ^ + DETAIL: This functionality requires the server to be built with libxml support. + HINT: You need to rebuild PostgreSQL using --with-libxml. + SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + ERROR: unsupported XML feature + LINE 1: SELECT xpath('//loc:*', '<local:data xmlns:local="http://127... + ^ + DETAIL: This functionality requires the server to be built with libxml support. + HINT: You need to rebuild PostgreSQL using --with-libxml. SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); ERROR: unsupported XML feature LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'... *** a/src/test/regress/sql/xml.sql --- b/src/test/regress/sql/xml.sql *************** *** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest; --- 178,185 ---- SELECT xpath('', '<!-- error -->'); SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>'); SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); SELECT xpath('//text()', '<root><</root>'); SELECT xpath('//@value', '<root value="<"/>');
*** a/src/backend/utils/adt/xml.c --- b/src/backend/utils/adt/xml.c *************** *** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone); static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int encoding); ! static text *xml_xmlnodetoxmltype(xmlNodePtr cur); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate); #endif /* USE_LIBXML */ static StringInfo query_to_xml_internal(const char *query, char *tablename, --- 141,151 ---- pg_enc encoding, int standalone); static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int encoding); ! static text *xml_xmlnodetoxmltype(xmlNodePtr cur, ! PgXmlErrorContext *xmlerrcxt); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate, ! PgXmlErrorContext *xmlerrcxt); #endif /* USE_LIBXML */ static StringInfo query_to_xml_internal(const char *query, char *tablename, *************** *** 3599,3624 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename, * return value otherwise) */ static text * ! xml_xmlnodetoxmltype(xmlNodePtr cur) { xmltype *result; if (cur->type == XML_ELEMENT_NODE) { xmlBufferPtr buf; buf = xmlBufferCreate(); PG_TRY(); { ! xmlNodeDump(buf, NULL, cur, 0, 1); result = xmlBuffer_to_xmltype(buf); } PG_CATCH(); { xmlBufferFree(buf); PG_RE_THROW(); } PG_END_TRY(); xmlBufferFree(buf); } else --- 3601,3640 ---- * return value otherwise) */ static text * ! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) { xmltype *result; if (cur->type == XML_ELEMENT_NODE) { xmlBufferPtr buf; + xmlNodePtr cur_copy; buf = xmlBufferCreate(); + + /* the result of xmlNodeDump won't contain namespace definitions + * from parent nodes, but xmlCopyNode duplicates a node along + * with its required namespace definitions. + */ + cur_copy = xmlCopyNode(cur, 1); + + if (cur_copy == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not serialize xml"); + PG_TRY(); { ! xmlNodeDump(buf, NULL, cur_copy, 0, 1); result = xmlBuffer_to_xmltype(buf); } PG_CATCH(); { + xmlFreeNode(cur_copy); xmlBufferFree(buf); PG_RE_THROW(); } PG_END_TRY(); + xmlFreeNode(cur_copy); xmlBufferFree(buf); } else *************** *** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur) */ static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate) { int result = 0; Datum datum; --- 3676,3683 ---- */ static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ! ArrayBuildState *astate, ! PgXmlErrorContext *xmlerrcxt) { int result = 0; Datum datum; *************** *** 3679,3685 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, for (i = 0; i < result; i++) { ! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i])); (void) accumArrayResult(astate, datum, false, XMLOID, CurrentMemoryContext); } --- 3696,3703 ---- for (i = 0; i < result; i++) { ! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], ! xmlerrcxt)); (void) accumArrayResult(astate, datum, false, XMLOID, CurrentMemoryContext); } *************** *** 3881,3889 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, * Extract the results as requested. */ if (res_nitems != NULL) ! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate); else ! (void) xml_xpathobjtoxmlarray(xpathobj, astate); } PG_CATCH(); { --- 3899,3907 ---- * Extract the results as requested. */ if (res_nitems != NULL) ! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); else ! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); } PG_CATCH(); { *** a/src/test/regress/expected/xml.out --- b/src/test/regress/expected/xml.out *************** *** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc --- 612,623 ---- {1,2} (1 row) + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + xpath + ------------------------------------------------------------------------------------------------------------------------------------------------ + {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"} + (1 row) + SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); xpath ------------------------- *** a/src/test/regress/expected/xml_1.out --- b/src/test/regress/expected/xml_1.out *************** *** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht... --- 514,531 ---- ^ DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + ERROR: unsupported XML feature + LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/... + ^ + DETAIL: This functionality requires the server to be built with libxml support. + HINT: You need to rebuild PostgreSQL using --with-libxml. + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + ERROR: unsupported XML feature + LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/... + ^ + DETAIL: This functionality requires the server to be built with libxml support. + HINT: You need to rebuild PostgreSQL using --with-libxml. SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); ERROR: unsupported XML feature LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'... *** a/src/test/regress/sql/xml.sql --- b/src/test/regress/sql/xml.sql *************** *** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest; --- 178,185 ---- SELECT xpath('', '<!-- error -->'); SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>'); SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); + SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); SELECT xpath('//text()', '<root><</root>'); SELECT xpath('//@value', '<root value="<"/>');
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers