On 28.08.24 10:19, Jim Jones wrote:
> Hi,
>
> While testing a feature reported by Pavel in this thread[1] I realized
> that elements containing whitespaces between them won't be indented with
> XMLSERIALIZE( ... INDENT)
>
> SELECT xmlserialize(DOCUMENT '<foo><bar>42</bar></foo>' AS text INDENT);
>
>   xmlserialize   
> -----------------
>  <foo>          +
>    <bar>42</bar>+
>  </foo>         +
>  
> (1 row)
>
> SELECT xmlserialize(DOCUMENT '<foo> <bar>42</bar> </foo>'::xml AS text
> INDENT);
>         xmlserialize        
> ----------------------------
>  <foo> <bar>42</bar> </foo>+
>  
> (1 row)
>
>
> Other products have a different approach[2]
>
> Perhaps simply setting xmltotext_with_options' parameter 
> "perserve_whitespace" to false when XMLSERIALIZE(.. INDENT) would do the 
> trick.
>
> doc = xml_parse(data, xmloption_arg, !indent ? true : false,
>                       GetDatabaseEncoding(),
>                       &parsed_xmloptiontype, &content_nodes,
>                       (Node *) &escontext);
>
>
> (diff attached)
>
> SELECT xmlserialize(DOCUMENT '<foo> <bar>42</bar> </foo>'::xml AS text
> INDENT);
>   xmlserialize   
> -----------------
>  <foo>          +
>    <bar>42</bar>+
>  </foo>         +
>  
> (1 row)
>
> If this is indeed the way to go I can update the regression tests accordingly.
>
> Best,
>

Just created a CF entry for this: https://commitfest.postgresql.org/49/5217/
v1 attached includes regression tests.

-- 
Jim
From e474cdc457a91e96432f5a14c69b4ecbc0345579 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jo...@uni-muenster.de>
Date: Thu, 29 Aug 2024 19:41:02 +0200
Subject: [PATCH v1] Bug fix for XMLSERIALIZE(...INDENT) for xml containing
 blank nodes

This fixes a bug that let XMLSERIALIZE(... INDENT) to ignore
blank nodes. It basically sets xml_parse's parameter
'preserve_whitespace' to false if the INDENT flag was used
in XMLSERIALIZE. New regression tests are also included.
---
 src/backend/utils/adt/xml.c         | 10 ++++++++--
 src/test/regress/expected/xml.out   | 19 +++++++++++++++++++
 src/test/regress/expected/xml_1.out | 11 +++++++++++
 src/test/regress/expected/xml_2.out | 19 +++++++++++++++++++
 src/test/regress/sql/xml.sql        |  3 +++
 5 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 447e72b21e..1cd4929870 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -677,8 +677,14 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+	/*
+	 * Parse the input according to the xmloption
+	 * preserve_whitespace is set to false in case the function should
+	 * return an indented xml, otherwise libxml2 will ignore the elements
+	 * that contain whitespaces between them.
+	 */
+	doc = xml_parse(data, xmloption_arg, !indent ? true : false,
+					GetDatabaseEncoding(),
 					&parsed_xmloptiontype, &content_nodes,
 					(Node *) &escontext);
 	if (doc == NULL || escontext.error_occurred)
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 93a79cda8f..6f073101a1 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -663,6 +663,25 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo>   <bar></bar>    </foo>' AS text INDENT);
+ xmlserialize 
+--------------
+ <foo>       +
+   <bar/>    +
+ </foo>      +
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT  'text node<foo>    <bar></bar>   </foo>' AS text INDENT);
+ xmlserialize 
+--------------
+ text node   +
+ <foo>       +
+   <bar/>    +
+ </foo>
+(1 row)
+
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 9323b84ae2..d26e10441e 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,17 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo>   <bar></bar>    </foo>' AS text INDENT);
+ERROR:  unsupported XML feature
+LINE 1: SELECT xmlserialize(DOCUMENT '<foo>   <bar></bar>    </foo>'...
+                                     ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(CONTENT  'text node<foo>    <bar></bar>   </foo>' AS text INDENT);
+ERROR:  unsupported XML feature
+LINE 1: SELECT xmlserialize(CONTENT  'text node<foo>    <bar></bar> ...
+                                     ^
+DETAIL:  This functionality requires the server to be built with libxml support.
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index f956322c69..7b154da4ba 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -649,6 +649,25 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo>   <bar></bar>    </foo>' AS text INDENT);
+ xmlserialize 
+--------------
+ <foo>       +
+   <bar/>    +
+ </foo>      +
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT  'text node<foo>    <bar></bar>   </foo>' AS text INDENT);
+ xmlserialize 
+--------------
+ text node   +
+ <foo>       +
+   <bar/>    +
+ </foo>
+(1 row)
+
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 953bac09e4..f752ecb142 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,9 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo>   <bar></bar>    </foo>' AS text INDENT);
+SELECT xmlserialize(CONTENT  'text node<foo>    <bar></bar>   </foo>' AS text INDENT);
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
-- 
2.34.1

Reply via email to