[ 
https://issues.apache.org/jira/browse/XERCESJ-766?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Elliotte Rusty Harold updated XERCESJ-766:
------------------------------------------
    Description: 
The serializer has problems when dealing with mixed content elements:
Sometimes additional whitespace gets inserted, sometimes whitespace
gets ignored:


```
public class TestXerces {
  public static void main( String[] args ) throws Exception {
    String inputXML =
      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
      "<!DOCTYPE DOC [\n"+
      "<!ELEMENT DOC ( P )* >\n"+
      "<!ELEMENT B (#PCDATA) >\n"+
      "<!ELEMENT P (#PCDATA|B|P)* >\n"+
      "]>\n"+
      "<DOC>"+
      "<P>no more <B>xx</B> &quot;spaces&quot; </P>"+
      "<P><B>xx</B> &quot;spaces&quot; </P>"+
      "<P><B>xx</B> more spaces </P>"+
      "<P><P><B>xx</B> more spaces </P></P>"+
      "<P><P><B>xx</B> more spaces </P> </P>"+
      "</DOC>";
    InputSource source = new InputSource(new StringReader(inputXML));
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DOMParser parser = new DOMParser();
    parser.setFeature("http://xml.org/sax/features/validation";, true );
    parser.parse(source);
    Document dom = parser.getDocument();
    StringWriter xmlWriter = new StringWriter();
    OutputFormat format = new OutputFormat("xml", "ISO-8859-15", false);
    format.setIndenting(true);
    format.setIndent( 2 );
    format.setLineWidth( 65000 );
    //format.setPreserveSpace(true);
    format.setOmitComments(false);
    //format.setStandalone(true);
    //format.setOmitXMLDeclaration(true);
    //format.setLineSeparator(System.getProperty("line.separator"));
    XMLSerializer serializer = new XMLSerializer(xmlWriter, format);
    //serializer.setNamespaces(true);
    serializer.serialize(dom);
    System.out.println(xmlWriter.toString());
  }
}
```

This results in:
<?xml version="1.0" encoding="ISO-8859-15"?>
<!DOCTYPE DOC [<!ELEMENT DOC (P)*>
<!ELEMENT B (#PCDATA)>
<!ELEMENT P (#PCDATA|B|P)*>
]>
<DOC>
  <P>no more <B>xx</B> &quot;spaces&quot; </P>
  <P>
    <B>xx</B> &quot;spaces&quot; </P>
  <P>
    <B>xx</B> more spaces </P>
  <P>
    <P>
      <B>xx</B> more spaces </P>
  </P>
  <P>
    <P>
      <B>xx</B> more spaces </P>
  </P>
</DOC>

I also got another case (however it didn't reproduce) where
 aaa<B>bbb</B> &amp; xxx
became 
 aaa<B>bbb</B>&amp; xxx

  was:
The serializer has problems when dealing with mixed content elements:
Sometimes additional whitespace gets inserted, sometimes whitespace
gets ignored:



public class TestXerces {
  public static void main( String[] args ) throws Exception {
    String inputXML =
      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
      "<!DOCTYPE DOC [\n"+
      "<!ELEMENT DOC ( P )* >\n"+
      "<!ELEMENT B (#PCDATA) >\n"+
      "<!ELEMENT P (#PCDATA|B|P)* >\n"+
      "]>\n"+
      "<DOC>"+
      "<P>no more <B>xx</B> &quot;spaces&quot; </P>"+
      "<P><B>xx</B> &quot;spaces&quot; </P>"+
      "<P><B>xx</B> more spaces </P>"+
      "<P><P><B>xx</B> more spaces </P></P>"+
      "<P><P><B>xx</B> more spaces </P> </P>"+
      "</DOC>";
    InputSource source = new InputSource(new StringReader(inputXML));
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DOMParser parser = new DOMParser();
    parser.setFeature("http://xml.org/sax/features/validation";, true );
    parser.parse(source);
    Document dom = parser.getDocument();
    StringWriter xmlWriter = new StringWriter();
    OutputFormat format = new OutputFormat("xml", "ISO-8859-15", false);
    format.setIndenting(true);
    format.setIndent( 2 );
    format.setLineWidth( 65000 );
    //format.setPreserveSpace(true);
    format.setOmitComments(false);
    //format.setStandalone(true);
    //format.setOmitXMLDeclaration(true);
    //format.setLineSeparator(System.getProperty("line.separator"));
    XMLSerializer serializer = new XMLSerializer(xmlWriter, format);
    //serializer.setNamespaces(true);
    serializer.serialize(dom);
    System.out.println(xmlWriter.toString());
  }
}

This results in:
<?xml version="1.0" encoding="ISO-8859-15"?>
<!DOCTYPE DOC [<!ELEMENT DOC (P)*>
<!ELEMENT B (#PCDATA)>
<!ELEMENT P (#PCDATA|B|P)*>
]>
<DOC>
  <P>no more <B>xx</B> &quot;spaces&quot; </P>
  <P>
    <B>xx</B> &quot;spaces&quot; </P>
  <P>
    <B>xx</B> more spaces </P>
  <P>
    <P>
      <B>xx</B> more spaces </P>
  </P>
  <P>
    <P>
      <B>xx</B> more spaces </P>
  </P>
</DOC>

I also got another case (however it didn't reporduce) where
 aaa<B>bbb</B> &amp; xxx
became 
 aaa<B>bbb</B>&amp; xxx


> unauthorized handling of whitespace when prettyprinting mixed content
> ---------------------------------------------------------------------
>
>                 Key: XERCESJ-766
>                 URL: https://issues.apache.org/jira/browse/XERCESJ-766
>             Project: Xerces2-J
>          Issue Type: Bug
>          Components: Serialization
>    Affects Versions: 2.4.0
>         Environment: Operating System: Linux
> Platform: PC
>            Reporter: Holger Klawitter
>
> The serializer has problems when dealing with mixed content elements:
> Sometimes additional whitespace gets inserted, sometimes whitespace
> gets ignored:
> ```
> public class TestXerces {
>   public static void main( String[] args ) throws Exception {
>     String inputXML =
>       "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
>       "<!DOCTYPE DOC [\n"+
>       "<!ELEMENT DOC ( P )* >\n"+
>       "<!ELEMENT B (#PCDATA) >\n"+
>       "<!ELEMENT P (#PCDATA|B|P)* >\n"+
>       "]>\n"+
>       "<DOC>"+
>       "<P>no more <B>xx</B> &quot;spaces&quot; </P>"+
>       "<P><B>xx</B> &quot;spaces&quot; </P>"+
>       "<P><B>xx</B> more spaces </P>"+
>       "<P><P><B>xx</B> more spaces </P></P>"+
>       "<P><P><B>xx</B> more spaces </P> </P>"+
>       "</DOC>";
>     InputSource source = new InputSource(new StringReader(inputXML));
>     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
>     DOMParser parser = new DOMParser();
>     parser.setFeature("http://xml.org/sax/features/validation";, true );
>     parser.parse(source);
>     Document dom = parser.getDocument();
>     StringWriter xmlWriter = new StringWriter();
>     OutputFormat format = new OutputFormat("xml", "ISO-8859-15", false);
>     format.setIndenting(true);
>     format.setIndent( 2 );
>     format.setLineWidth( 65000 );
>     //format.setPreserveSpace(true);
>     format.setOmitComments(false);
>     //format.setStandalone(true);
>     //format.setOmitXMLDeclaration(true);
>     //format.setLineSeparator(System.getProperty("line.separator"));
>     XMLSerializer serializer = new XMLSerializer(xmlWriter, format);
>     //serializer.setNamespaces(true);
>     serializer.serialize(dom);
>     System.out.println(xmlWriter.toString());
>   }
> }
> ```
> This results in:
> <?xml version="1.0" encoding="ISO-8859-15"?>
> <!DOCTYPE DOC [<!ELEMENT DOC (P)*>
> <!ELEMENT B (#PCDATA)>
> <!ELEMENT P (#PCDATA|B|P)*>
> ]>
> <DOC>
>   <P>no more <B>xx</B> &quot;spaces&quot; </P>
>   <P>
>     <B>xx</B> &quot;spaces&quot; </P>
>   <P>
>     <B>xx</B> more spaces </P>
>   <P>
>     <P>
>       <B>xx</B> more spaces </P>
>   </P>
>   <P>
>     <P>
>       <B>xx</B> more spaces </P>
>   </P>
> </DOC>
> I also got another case (however it didn't reproduce) where
>  aaa<B>bbb</B> &amp; xxx
> became 
>  aaa<B>bbb</B>&amp; xxx



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to