On 28.08.24 10:19, Jim Jones wrote: > Hi, > > While testing a feature reported by Pavel in this thread[1] I realized > that elements containing whitespaces between them won't be indented with > XMLSERIALIZE( ... INDENT) > > SELECT xmlserialize(DOCUMENT '<foo><bar>42</bar></foo>' AS text INDENT); > > xmlserialize > ----------------- > <foo> + > <bar>42</bar>+ > </foo> + > > (1 row) > > SELECT xmlserialize(DOCUMENT '<foo> <bar>42</bar> </foo>'::xml AS text > INDENT); > xmlserialize > ---------------------------- > <foo> <bar>42</bar> </foo>+ > > (1 row) > > > Other products have a different approach[2] > > Perhaps simply setting xmltotext_with_options' parameter > "perserve_whitespace" to false when XMLSERIALIZE(.. INDENT) would do the > trick. > > doc = xml_parse(data, xmloption_arg, !indent ? true : false, > GetDatabaseEncoding(), > &parsed_xmloptiontype, &content_nodes, > (Node *) &escontext); > > > (diff attached) > > SELECT xmlserialize(DOCUMENT '<foo> <bar>42</bar> </foo>'::xml AS text > INDENT); > xmlserialize > ----------------- > <foo> + > <bar>42</bar>+ > </foo> + > > (1 row) > > If this is indeed the way to go I can update the regression tests accordingly. > > Best, >
Just created a CF entry for this: https://commitfest.postgresql.org/49/5217/ v1 attached includes regression tests. -- Jim
From e474cdc457a91e96432f5a14c69b4ecbc0345579 Mon Sep 17 00:00:00 2001 From: Jim Jones <jim.jo...@uni-muenster.de> Date: Thu, 29 Aug 2024 19:41:02 +0200 Subject: [PATCH v1] Bug fix for XMLSERIALIZE(...INDENT) for xml containing blank nodes This fixes a bug that let XMLSERIALIZE(... INDENT) to ignore blank nodes. It basically sets xml_parse's parameter 'preserve_whitespace' to false if the INDENT flag was used in XMLSERIALIZE. New regression tests are also included. --- src/backend/utils/adt/xml.c | 10 ++++++++-- src/test/regress/expected/xml.out | 19 +++++++++++++++++++ src/test/regress/expected/xml_1.out | 11 +++++++++++ src/test/regress/expected/xml_2.out | 19 +++++++++++++++++++ src/test/regress/sql/xml.sql | 3 +++ 5 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 447e72b21e..1cd4929870 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -677,8 +677,14 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) } #ifdef USE_LIBXML - /* Parse the input according to the xmloption */ - doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(), + /* + * Parse the input according to the xmloption + * preserve_whitespace is set to false in case the function should + * return an indented xml, otherwise libxml2 will ignore the elements + * that contain whitespaces between them. + */ + doc = xml_parse(data, xmloption_arg, !indent ? true : false, + GetDatabaseEncoding(), &parsed_xmloptiontype, &content_nodes, (Node *) &escontext); if (doc == NULL || escontext.error_occurred) diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 93a79cda8f..6f073101a1 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -663,6 +663,25 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text t (1 row) +-- indent xml strings containing blank nodes +SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT); + xmlserialize +-------------- + <foo> + + <bar/> + + </foo> + + +(1 row) + +SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT); + xmlserialize +-------------- + text node + + <foo> + + <bar/> + + </foo> +(1 row) + SELECT xml '<foo>bar</foo>' IS DOCUMENT; ?column? ---------- diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 9323b84ae2..d26e10441e 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -443,6 +443,17 @@ ERROR: unsupported XML feature LINE 1: SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><... ^ DETAIL: This functionality requires the server to be built with libxml support. +-- indent xml strings containing blank nodes +SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT); +ERROR: unsupported XML feature +LINE 1: SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>'... + ^ +DETAIL: This functionality requires the server to be built with libxml support. +SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT); +ERROR: unsupported XML feature +LINE 1: SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> ... + ^ +DETAIL: This functionality requires the server to be built with libxml support. SELECT xml '<foo>bar</foo>' IS DOCUMENT; ERROR: unsupported XML feature LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT; diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out index f956322c69..7b154da4ba 100644 --- a/src/test/regress/expected/xml_2.out +++ b/src/test/regress/expected/xml_2.out @@ -649,6 +649,25 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text t (1 row) +-- indent xml strings containing blank nodes +SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT); + xmlserialize +-------------- + <foo> + + <bar/> + + </foo> + + +(1 row) + +SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT); + xmlserialize +-------------- + text node + + <foo> + + <bar/> + + </foo> +(1 row) + SELECT xml '<foo>bar</foo>' IS DOCUMENT; ?column? ---------- diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 953bac09e4..f752ecb142 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -168,6 +168,9 @@ SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT); -- 'no indent' = not using 'no indent' SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT); SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT); +-- indent xml strings containing blank nodes +SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT); +SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT); SELECT xml '<foo>bar</foo>' IS DOCUMENT; SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT; -- 2.34.1