[
https://issues.apache.org/jira/browse/XERCESJ-766?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Elliotte Rusty Harold updated XERCESJ-766:
------------------------------------------
Description:
The serializer has problems when dealing with mixed content elements:
Sometimes additional whitespace gets inserted, sometimes whitespace
gets ignored:
{noformat}
public class TestXerces {
public static void main( String[] args ) throws Exception {
String inputXML =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<!DOCTYPE DOC [\n"+
"<!ELEMENT DOC ( P )* >\n"+
"<!ELEMENT B (#PCDATA) >\n"+
"<!ELEMENT P (#PCDATA|B|P)* >\n"+
"]>\n"+
"<DOC>"+
"<P>no more <B>xx</B> "spaces" </P>"+
"<P><B>xx</B> "spaces" </P>"+
"<P><B>xx</B> more spaces </P>"+
"<P><P><B>xx</B> more spaces </P></P>"+
"<P><P><B>xx</B> more spaces </P> </P>"+
"</DOC>";
InputSource source = new InputSource(new StringReader(inputXML));
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DOMParser parser = new DOMParser();
parser.setFeature("http://xml.org/sax/features/validation", true );
parser.parse(source);
Document dom = parser.getDocument();
StringWriter xmlWriter = new StringWriter();
OutputFormat format = new OutputFormat("xml", "ISO-8859-15", false);
format.setIndenting(true);
format.setIndent( 2 );
format.setLineWidth( 65000 );
//format.setPreserveSpace(true);
format.setOmitComments(false);
//format.setStandalone(true);
//format.setOmitXMLDeclaration(true);
//format.setLineSeparator(System.getProperty("line.separator"));
XMLSerializer serializer = new XMLSerializer(xmlWriter, format);
//serializer.setNamespaces(true);
serializer.serialize(dom);
System.out.println(xmlWriter.toString());
}
}
{noformat}
This results in:
<?xml version="1.0" encoding="ISO-8859-15"?>
<!DOCTYPE DOC [<!ELEMENT DOC (P)*>
<!ELEMENT B (#PCDATA)>
<!ELEMENT P (#PCDATA|B|P)*>
]>
<DOC>
<P>no more <B>xx</B> "spaces" </P>
<P>
<B>xx</B> "spaces" </P>
<P>
<B>xx</B> more spaces </P>
<P>
<P>
<B>xx</B> more spaces </P>
</P>
<P>
<P>
<B>xx</B> more spaces </P>
</P>
</DOC>
I also got another case (however it didn't reproduce) where
aaa<B>bbb</B> & xxx
became
aaa<B>bbb</B>& xxx
was:
The serializer has problems when dealing with mixed content elements:
Sometimes additional whitespace gets inserted, sometimes whitespace
gets ignored:
```
public class TestXerces {
public static void main( String[] args ) throws Exception {
String inputXML =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
"<!DOCTYPE DOC [\n"+
"<!ELEMENT DOC ( P )* >\n"+
"<!ELEMENT B (#PCDATA) >\n"+
"<!ELEMENT P (#PCDATA|B|P)* >\n"+
"]>\n"+
"<DOC>"+
"<P>no more <B>xx</B> "spaces" </P>"+
"<P><B>xx</B> "spaces" </P>"+
"<P><B>xx</B> more spaces </P>"+
"<P><P><B>xx</B> more spaces </P></P>"+
"<P><P><B>xx</B> more spaces </P> </P>"+
"</DOC>";
InputSource source = new InputSource(new StringReader(inputXML));
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DOMParser parser = new DOMParser();
parser.setFeature("http://xml.org/sax/features/validation", true );
parser.parse(source);
Document dom = parser.getDocument();
StringWriter xmlWriter = new StringWriter();
OutputFormat format = new OutputFormat("xml", "ISO-8859-15", false);
format.setIndenting(true);
format.setIndent( 2 );
format.setLineWidth( 65000 );
//format.setPreserveSpace(true);
format.setOmitComments(false);
//format.setStandalone(true);
//format.setOmitXMLDeclaration(true);
//format.setLineSeparator(System.getProperty("line.separator"));
XMLSerializer serializer = new XMLSerializer(xmlWriter, format);
//serializer.setNamespaces(true);
serializer.serialize(dom);
System.out.println(xmlWriter.toString());
}
}
```
This results in:
<?xml version="1.0" encoding="ISO-8859-15"?>
<!DOCTYPE DOC [<!ELEMENT DOC (P)*>
<!ELEMENT B (#PCDATA)>
<!ELEMENT P (#PCDATA|B|P)*>
]>
<DOC>
<P>no more <B>xx</B> "spaces" </P>
<P>
<B>xx</B> "spaces" </P>
<P>
<B>xx</B> more spaces </P>
<P>
<P>
<B>xx</B> more spaces </P>
</P>
<P>
<P>
<B>xx</B> more spaces </P>
</P>
</DOC>
I also got another case (however it didn't reproduce) where
aaa<B>bbb</B> & xxx
became
aaa<B>bbb</B>& xxx
> unauthorized handling of whitespace when prettyprinting mixed content
> ---------------------------------------------------------------------
>
> Key: XERCESJ-766
> URL: https://issues.apache.org/jira/browse/XERCESJ-766
> Project: Xerces2-J
> Issue Type: Bug
> Components: Serialization
> Affects Versions: 2.4.0
> Environment: Operating System: Linux
> Platform: PC
> Reporter: Holger Klawitter
>
> The serializer has problems when dealing with mixed content elements:
> Sometimes additional whitespace gets inserted, sometimes whitespace
> gets ignored:
> {noformat}
> public class TestXerces {
> public static void main( String[] args ) throws Exception {
> String inputXML =
> "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
> "<!DOCTYPE DOC [\n"+
> "<!ELEMENT DOC ( P )* >\n"+
> "<!ELEMENT B (#PCDATA) >\n"+
> "<!ELEMENT P (#PCDATA|B|P)* >\n"+
> "]>\n"+
> "<DOC>"+
> "<P>no more <B>xx</B> "spaces" </P>"+
> "<P><B>xx</B> "spaces" </P>"+
> "<P><B>xx</B> more spaces </P>"+
> "<P><P><B>xx</B> more spaces </P></P>"+
> "<P><P><B>xx</B> more spaces </P> </P>"+
> "</DOC>";
> InputSource source = new InputSource(new StringReader(inputXML));
> DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
> DOMParser parser = new DOMParser();
> parser.setFeature("http://xml.org/sax/features/validation", true );
> parser.parse(source);
> Document dom = parser.getDocument();
> StringWriter xmlWriter = new StringWriter();
> OutputFormat format = new OutputFormat("xml", "ISO-8859-15", false);
> format.setIndenting(true);
> format.setIndent( 2 );
> format.setLineWidth( 65000 );
> //format.setPreserveSpace(true);
> format.setOmitComments(false);
> //format.setStandalone(true);
> //format.setOmitXMLDeclaration(true);
> //format.setLineSeparator(System.getProperty("line.separator"));
> XMLSerializer serializer = new XMLSerializer(xmlWriter, format);
> //serializer.setNamespaces(true);
> serializer.serialize(dom);
> System.out.println(xmlWriter.toString());
> }
> }
> {noformat}
> This results in:
> <?xml version="1.0" encoding="ISO-8859-15"?>
> <!DOCTYPE DOC [<!ELEMENT DOC (P)*>
> <!ELEMENT B (#PCDATA)>
> <!ELEMENT P (#PCDATA|B|P)*>
> ]>
> <DOC>
> <P>no more <B>xx</B> "spaces" </P>
> <P>
> <B>xx</B> "spaces" </P>
> <P>
> <B>xx</B> more spaces </P>
> <P>
> <P>
> <B>xx</B> more spaces </P>
> </P>
> <P>
> <P>
> <B>xx</B> more spaces </P>
> </P>
> </DOC>
> I also got another case (however it didn't reproduce) where
> aaa<B>bbb</B> & xxx
> became
> aaa<B>bbb</B>& xxx
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]