Author: schor
Date: Wed May 23 20:32:08 2012
New Revision: 1342032

URL: http://svn.apache.org/viewvc?rev=1342032&view=rev
Log:
[UIMA-2408] improve indenting when serializing XML

Modified:
    
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java
    
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java
    
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java

Modified: 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java?rev=1342032&r1=1342031&r2=1342032&view=diff
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java
 (original)
+++ 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java
 Wed May 23 20:32:08 2012
@@ -88,6 +88,7 @@ public abstract class MetaDataObject_imp
   static final long serialVersionUID = 5876728533863334480L;
 
   private static String PROP_NAME_SOURCE_URL = "sourceUrl";
+  private static String PROP_NAME_INFOSET = "infoset";
 
   private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
 
@@ -104,6 +105,10 @@ public abstract class MetaDataObject_imp
     this.infoset = infoset;
   }
 
+  public Node getInfoset() {
+    return infoset;
+  }
+
   /**
    * Creates a new <code>MetaDataObject_impl</code> with null attribute values
    */
@@ -127,8 +132,11 @@ public abstract class MetaDataObject_imp
         // only list properties with read and write methods,
         // and don't include the SourceUrl property, which is for
         // internal bookkeeping and shouldn't affect object equality
+        // and don't include infoset, which is for internal bookkeeping
+        // related to comments and whitespace
         if (props[i].getReadMethod() != null && props[i].getWriteMethod() != 
null
-                && !props[i].getName().equals(PROP_NAME_SOURCE_URL)) {
+                && !props[i].getName().equals(PROP_NAME_SOURCE_URL)
+                && !props[i].getName().equals(PROP_NAME_INFOSET)) {
           String propName = props[i].getName();
           Class propClass = props[i].getPropertyType();
           // translate primitive types (int, boolean, etc.) to wrapper classes
@@ -577,7 +585,7 @@ public abstract class MetaDataObject_imp
   private void toXML(XMLSerializer sax2xml) throws SAXException, IOException {
     ContentHandler contentHandler = sax2xml.getContentHandler();
     contentHandler.startDocument();
-    toXML(sax2xml.getContentHandler(), true);
+    toXML(contentHandler, true);  // no reason to create a new content handler
     contentHandler.endDocument();    
   }
 
@@ -1345,7 +1353,6 @@ public abstract class MetaDataObject_imp
    *       
    * For normal element node, "end":
    *   --> output before element
-   *     if element children:
    *       collect all after last child Element; skip all up to first nl 
(assume before that, the comment goes with last child node)
    *       if no nl (e.g.   </lastChild> <!--  cmt -->  </elementBeingEnded> )
    *         assume comments go with previous element, and skip here
@@ -1366,6 +1373,11 @@ public abstract class MetaDataObject_imp
    *   Scan from last-outputted, to find element match, and then use that 
element as the "root".       
    *    
    */
+  private static final char[] blanks = new char[80];
+  static {Arrays.fill(blanks, ' ');}
+  private void outputIndent(int indent, ContentHandler contentHandler) throws 
SAXException {
+    contentHandler.ignorableWhitespace(blanks, 0, Math.min(80, indent));
+  }
   
   /**
    * CoIw = Comment or IgnorableWhitespace
@@ -1373,7 +1385,19 @@ public abstract class MetaDataObject_imp
    */
   
   private void maybeOutputCoIwBeforeStart(ContentHandler contentHandler, Node 
node) throws SAXException {
+    int indent = 0;
+    if (contentHandler instanceof CharacterValidatingContentHandler) {
+      indent = ((CharacterValidatingContentHandler) 
contentHandler).getIndent();;
+    }
+   
     if (null == node) {
+      if (contentHandler instanceof CharacterValidatingContentHandler) {
+        CharacterValidatingContentHandler cvch = 
(CharacterValidatingContentHandler)contentHandler;
+        if (!cvch.prevNL) { 
+          outputNL(contentHandler);
+          outputIndent(indent, contentHandler);
+        }
+      }
       return;
     }
     if (node.getParentNode() instanceof Document) {
@@ -1402,26 +1426,57 @@ public abstract class MetaDataObject_imp
   }
  
   private void maybeOutputCoIwAfterStart(ContentHandler contentHandler, Node 
node) throws SAXException {
+    if (contentHandler instanceof CharacterValidatingContentHandler) {
+      ((CharacterValidatingContentHandler)contentHandler).nextIndent();
+    }
     if (null == node || (!hasElementChildNode(node))) {
+      if (contentHandler instanceof CharacterValidatingContentHandler) {
+        ((CharacterValidatingContentHandler)contentHandler).prevNL = false;
+      }
       return;
     }
-    
-    for (Node n = node.getFirstChild(); isCoIw(n); n = n.getNextSibling()) {
-      outputCoIw(contentHandler, n);
-      if (hasNewline(n)) {
-        return;
+
+    outputCoIwAfterElement(node.getFirstChild(), contentHandler);
+  }
+  
+  private void outputCoIwAfterElement(Node startNode, ContentHandler 
contentHandler) throws DOMException, SAXException {
+    if (null != startNode) {
+      for (Node n = startNode.getFirstChild(); isCoIw(n); n = 
n.getNextSibling()) {
+        outputCoIw(contentHandler, n);
+        if (hasNewline(n)) {
+          if (contentHandler instanceof CharacterValidatingContentHandler) {
+            ((CharacterValidatingContentHandler) contentHandler).prevNL = true;
+          }
+          return;
+        }
       }
     }
+    if (contentHandler instanceof CharacterValidatingContentHandler) {
+      ((CharacterValidatingContentHandler) contentHandler).prevNL = false;
+    }
   }
   
   private void maybeOutputCoIwBeforeEnd(ContentHandler contentHandler, Node 
node) throws SAXException {
+    int indent = 0;
+    if (contentHandler instanceof CharacterValidatingContentHandler) {
+      indent = ((CharacterValidatingContentHandler) 
contentHandler).prevIndent();
+    }
+    
     if (null == node || (!hasElementChildNode(node))) {
+      if (null == node && contentHandler instanceof 
CharacterValidatingContentHandler) {
+        CharacterValidatingContentHandler cvch = 
(CharacterValidatingContentHandler)contentHandler;
+        if (cvch.prevWasEndElement) { 
+          outputNL(contentHandler);
+          outputIndent(indent, contentHandler);
+        } 
+      }
       return;
     }
+    
     Node n = node.getLastChild();
     Node np = null;
     boolean newlineFound = false;
-    for (Node p = n; p != null && !(p instanceof Element); p = 
p.getPreviousSibling()) {
+    for (Node p = n; p != null && !(p instanceof Element) && (p.getNodeType() 
!= Node.ATTRIBUTE_NODE); p = p.getPreviousSibling()) {
       if (hasNewline(p)) {
         newlineFound = true;
       }
@@ -1439,12 +1494,7 @@ public abstract class MetaDataObject_imp
     if (null == node) {
       return;
     }
-    for (Node o = node.getNextSibling(); isCoIw(o); o = o.getNextSibling()) {
-      outputCoIw(contentHandler, o);
-      if (hasNewline(o)) {
-        break;
-      }
-    }
+    outputCoIwAfterElement(node.getNextSibling(), contentHandler);
   }
 
   /**
@@ -1582,8 +1632,23 @@ public abstract class MetaDataObject_imp
   }
   
   private static final char[] nlca = new char[] {'\n'};
+  
   private void outputNL(ContentHandler contentHandler) throws SAXException {
-    contentHandler.characters(nlca, 0, 1); 
+    contentHandler.ignorableWhitespace(nlca, 0, 1); 
+    if (contentHandler instanceof CharacterValidatingContentHandler) {
+      ((CharacterValidatingContentHandler) contentHandler).prevNL = true;
+    }
   }
 
+  // skip forward over any attribute nodes
+//  private Node skipAttrNodes(Node node) {
+//    while (node.getNodeType() == Node.ATTRIBUTE_NODE) {
+//      Node next = node.getNextSibling();
+//      if (next == null) {
+//        return node;  // return last good node if only attribute nodes
+//      }
+//      node = next;
+//    }
+//    return node;
+//  }
 }

Modified: 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java?rev=1342032&r1=1342031&r2=1342032&view=diff
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java
 (original)
+++ 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java
 Wed May 23 20:32:08 2012
@@ -163,10 +163,39 @@ public class XMLSerializer {
     }
   }  
   
-  public static class CharacterValidatingContentHandler implements 
ContentHandler, LexicalHandler {
-    ContentHandler mHandler;
-    boolean mXml11;
+  /**
+   * This class wraps the standard content handler
+   */
+  public class CharacterValidatingContentHandler implements ContentHandler, 
LexicalHandler {
+    ContentHandler mHandler;  // the wrapped handler
+    boolean mXml11;           
     
+    private int indent = 0;  // tracks indentation for nicely indented output
+    
+    public int getIndent() {
+      return indent;
+    }
+    
+    public int nextIndent() {
+      indent += indentDelta;
+      return indent;
+    }
+
+    public int prevIndent() {
+      indent -= indentDelta;
+      return indent;
+    }
+    
+    private int indentDelta = 0;  // set to positive # to indent each level
+    
+    public int getIndentDelta() {
+      return indentDelta;
+    }
+
+    public void setIndentDelta(int indentDelta) {
+      this.indentDelta = indentDelta;
+    }
+
     private List<Node> mLastOutputNode = new ArrayList<Node>();  // the last 
output node for repeated subelement nodes 
     
     public void lastOutputNodeAddLevel() {
@@ -185,9 +214,21 @@ public class XMLSerializer {
       mLastOutputNode.remove(mLastOutputNode.size() -1);
     }
     
+    public boolean prevWasEndElement = false;
+    
+    public boolean prevNL = false;
+    
     CharacterValidatingContentHandler(boolean xml11, ContentHandler 
serializerHandler) {
       mHandler = serializerHandler;  
       mXml11 = xml11;
+      String indentDeltaString = 
mTransformer.getOutputProperty("{http://xml.apache.org/xslt}indent-amount";);
+      if (null != indentDeltaString) {
+        try {
+          indentDelta = Integer.parseInt(indentDeltaString);
+        } catch (NumberFormatException e) {
+          indentDelta = 0;
+        }
+      }
     }
 
     /* (non-Javadoc)
@@ -198,8 +239,8 @@ public class XMLSerializer {
         String val = atts.getValue(i);
         checkForInvalidXmlChars(val, mXml11);
       }
-      mHandler.startElement(uri, localName, qName, atts);
-      
+      mHandler.startElement(uri, localName, qName, atts); 
+      prevWasEndElement = false;
     }
     
     /* (non-Javadoc)
@@ -208,6 +249,8 @@ public class XMLSerializer {
     public void characters(char[] ch, int start, int length) throws 
SAXException {
       checkForInvalidXmlChars(ch, start, length, mXml11);
       mHandler.characters(ch, start, length);
+//      nlOK = false;  //unfortunately, non validating dom parsers can't 
detect ignorable whitespace,
+      // so they use characters instead...
     }
 
     /* (non-Javadoc)
@@ -222,6 +265,7 @@ public class XMLSerializer {
      */
     public void endElement(String uri, String localName, String qName) throws 
SAXException {
       mHandler.endElement(uri, localName, qName);
+      prevWasEndElement = true;
     }
 
     /* (non-Javadoc)
@@ -263,6 +307,7 @@ public class XMLSerializer {
      * @see org.xml.sax.ContentHandler#startDocument()
      */
     public void startDocument() throws SAXException {
+      indent = 0;
       mHandler.startDocument();
     }
 

Modified: 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java?rev=1342032&r1=1342031&r2=1342032&view=diff
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java
 (original)
+++ 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java
 Wed May 23 20:32:08 2012
@@ -168,7 +168,7 @@ public class MetaDataObject_implTest ext
    * excercise the {@link MetaDataObject#getAttributeValue(String)} and
    * {@link MetaDataObject#setAttributeValue(String,Object)} methods.
    */
-  public void bestXMLization() throws Exception {
+  public void testXMLization() throws Exception {
     try {
       // write objects to XML
 
@@ -234,7 +234,7 @@ public class MetaDataObject_implTest ext
       Assert.assertEquals("banana", fruits[0].getName());
       Assert.assertEquals("raspberry", fruits[1].getName());
 
-      // property name ommitted but can be inferred from type of value
+      // property name omitted but can be inferred from type of value
       xmlStr = "<fruit><name>banana</name><string>yellow</string></fruit>";
       xmlDoc = docBuilder.parse(new ByteArrayInputStream(xmlStr.getBytes()));
       TestFruitObject banana = new TestFruitObject();
@@ -255,7 +255,7 @@ public class MetaDataObject_implTest ext
     }
   }
 
-  public void bestSerialization() throws Exception {
+  public void testSerialization() throws Exception {
     try {
       byte[] apple1Bytes = SerializationUtils.serialize(apple1);
       TestFruitObject apple1a = (TestFruitObject) 
SerializationUtils.deserialize(apple1Bytes);


Reply via email to