Author: schor Date: Wed May 23 20:32:08 2012 New Revision: 1342032 URL: http://svn.apache.org/viewvc?rev=1342032&view=rev Log: [UIMA-2408] improve indenting when serializing XML
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java?rev=1342032&r1=1342031&r2=1342032&view=diff ============================================================================== --- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java (original) +++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java Wed May 23 20:32:08 2012 @@ -88,6 +88,7 @@ public abstract class MetaDataObject_imp static final long serialVersionUID = 5876728533863334480L; private static String PROP_NAME_SOURCE_URL = "sourceUrl"; + private static String PROP_NAME_INFOSET = "infoset"; private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl(); @@ -104,6 +105,10 @@ public abstract class MetaDataObject_imp this.infoset = infoset; } + public Node getInfoset() { + return infoset; + } + /** * Creates a new <code>MetaDataObject_impl</code> with null attribute values */ @@ -127,8 +132,11 @@ public abstract class MetaDataObject_imp // only list properties with read and write methods, // and don't include the SourceUrl property, which is for // internal bookkeeping and shouldn't affect object equality + // and don't include infoset, which is for internal bookkeeping + // related to comments and whitespace if (props[i].getReadMethod() != null && props[i].getWriteMethod() != null - && !props[i].getName().equals(PROP_NAME_SOURCE_URL)) { + && !props[i].getName().equals(PROP_NAME_SOURCE_URL) + && !props[i].getName().equals(PROP_NAME_INFOSET)) { String propName = props[i].getName(); Class propClass = props[i].getPropertyType(); // translate primitive types (int, boolean, etc.) to wrapper classes @@ -577,7 +585,7 @@ public abstract class MetaDataObject_imp private void toXML(XMLSerializer sax2xml) throws SAXException, IOException { ContentHandler contentHandler = sax2xml.getContentHandler(); contentHandler.startDocument(); - toXML(sax2xml.getContentHandler(), true); + toXML(contentHandler, true); // no reason to create a new content handler contentHandler.endDocument(); } @@ -1345,7 +1353,6 @@ public abstract class MetaDataObject_imp * * For normal element node, "end": * --> output before element - * if element children: * collect all after last child Element; skip all up to first nl (assume before that, the comment goes with last child node) * if no nl (e.g. </lastChild> <!-- cmt --> </elementBeingEnded> ) * assume comments go with previous element, and skip here @@ -1366,6 +1373,11 @@ public abstract class MetaDataObject_imp * Scan from last-outputted, to find element match, and then use that element as the "root". * */ + private static final char[] blanks = new char[80]; + static {Arrays.fill(blanks, ' ');} + private void outputIndent(int indent, ContentHandler contentHandler) throws SAXException { + contentHandler.ignorableWhitespace(blanks, 0, Math.min(80, indent)); + } /** * CoIw = Comment or IgnorableWhitespace @@ -1373,7 +1385,19 @@ public abstract class MetaDataObject_imp */ private void maybeOutputCoIwBeforeStart(ContentHandler contentHandler, Node node) throws SAXException { + int indent = 0; + if (contentHandler instanceof CharacterValidatingContentHandler) { + indent = ((CharacterValidatingContentHandler) contentHandler).getIndent();; + } + if (null == node) { + if (contentHandler instanceof CharacterValidatingContentHandler) { + CharacterValidatingContentHandler cvch = (CharacterValidatingContentHandler)contentHandler; + if (!cvch.prevNL) { + outputNL(contentHandler); + outputIndent(indent, contentHandler); + } + } return; } if (node.getParentNode() instanceof Document) { @@ -1402,26 +1426,57 @@ public abstract class MetaDataObject_imp } private void maybeOutputCoIwAfterStart(ContentHandler contentHandler, Node node) throws SAXException { + if (contentHandler instanceof CharacterValidatingContentHandler) { + ((CharacterValidatingContentHandler)contentHandler).nextIndent(); + } if (null == node || (!hasElementChildNode(node))) { + if (contentHandler instanceof CharacterValidatingContentHandler) { + ((CharacterValidatingContentHandler)contentHandler).prevNL = false; + } return; } - - for (Node n = node.getFirstChild(); isCoIw(n); n = n.getNextSibling()) { - outputCoIw(contentHandler, n); - if (hasNewline(n)) { - return; + + outputCoIwAfterElement(node.getFirstChild(), contentHandler); + } + + private void outputCoIwAfterElement(Node startNode, ContentHandler contentHandler) throws DOMException, SAXException { + if (null != startNode) { + for (Node n = startNode.getFirstChild(); isCoIw(n); n = n.getNextSibling()) { + outputCoIw(contentHandler, n); + if (hasNewline(n)) { + if (contentHandler instanceof CharacterValidatingContentHandler) { + ((CharacterValidatingContentHandler) contentHandler).prevNL = true; + } + return; + } } } + if (contentHandler instanceof CharacterValidatingContentHandler) { + ((CharacterValidatingContentHandler) contentHandler).prevNL = false; + } } private void maybeOutputCoIwBeforeEnd(ContentHandler contentHandler, Node node) throws SAXException { + int indent = 0; + if (contentHandler instanceof CharacterValidatingContentHandler) { + indent = ((CharacterValidatingContentHandler) contentHandler).prevIndent(); + } + if (null == node || (!hasElementChildNode(node))) { + if (null == node && contentHandler instanceof CharacterValidatingContentHandler) { + CharacterValidatingContentHandler cvch = (CharacterValidatingContentHandler)contentHandler; + if (cvch.prevWasEndElement) { + outputNL(contentHandler); + outputIndent(indent, contentHandler); + } + } return; } + Node n = node.getLastChild(); Node np = null; boolean newlineFound = false; - for (Node p = n; p != null && !(p instanceof Element); p = p.getPreviousSibling()) { + for (Node p = n; p != null && !(p instanceof Element) && (p.getNodeType() != Node.ATTRIBUTE_NODE); p = p.getPreviousSibling()) { if (hasNewline(p)) { newlineFound = true; } @@ -1439,12 +1494,7 @@ public abstract class MetaDataObject_imp if (null == node) { return; } - for (Node o = node.getNextSibling(); isCoIw(o); o = o.getNextSibling()) { - outputCoIw(contentHandler, o); - if (hasNewline(o)) { - break; - } - } + outputCoIwAfterElement(node.getNextSibling(), contentHandler); } /** @@ -1582,8 +1632,23 @@ public abstract class MetaDataObject_imp } private static final char[] nlca = new char[] {'\n'}; + private void outputNL(ContentHandler contentHandler) throws SAXException { - contentHandler.characters(nlca, 0, 1); + contentHandler.ignorableWhitespace(nlca, 0, 1); + if (contentHandler instanceof CharacterValidatingContentHandler) { + ((CharacterValidatingContentHandler) contentHandler).prevNL = true; + } } + // skip forward over any attribute nodes +// private Node skipAttrNodes(Node node) { +// while (node.getNodeType() == Node.ATTRIBUTE_NODE) { +// Node next = node.getNextSibling(); +// if (next == null) { +// return node; // return last good node if only attribute nodes +// } +// node = next; +// } +// return node; +// } } Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java?rev=1342032&r1=1342031&r2=1342032&view=diff ============================================================================== --- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java (original) +++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java Wed May 23 20:32:08 2012 @@ -163,10 +163,39 @@ public class XMLSerializer { } } - public static class CharacterValidatingContentHandler implements ContentHandler, LexicalHandler { - ContentHandler mHandler; - boolean mXml11; + /** + * This class wraps the standard content handler + */ + public class CharacterValidatingContentHandler implements ContentHandler, LexicalHandler { + ContentHandler mHandler; // the wrapped handler + boolean mXml11; + private int indent = 0; // tracks indentation for nicely indented output + + public int getIndent() { + return indent; + } + + public int nextIndent() { + indent += indentDelta; + return indent; + } + + public int prevIndent() { + indent -= indentDelta; + return indent; + } + + private int indentDelta = 0; // set to positive # to indent each level + + public int getIndentDelta() { + return indentDelta; + } + + public void setIndentDelta(int indentDelta) { + this.indentDelta = indentDelta; + } + private List<Node> mLastOutputNode = new ArrayList<Node>(); // the last output node for repeated subelement nodes public void lastOutputNodeAddLevel() { @@ -185,9 +214,21 @@ public class XMLSerializer { mLastOutputNode.remove(mLastOutputNode.size() -1); } + public boolean prevWasEndElement = false; + + public boolean prevNL = false; + CharacterValidatingContentHandler(boolean xml11, ContentHandler serializerHandler) { mHandler = serializerHandler; mXml11 = xml11; + String indentDeltaString = mTransformer.getOutputProperty("{http://xml.apache.org/xslt}indent-amount"); + if (null != indentDeltaString) { + try { + indentDelta = Integer.parseInt(indentDeltaString); + } catch (NumberFormatException e) { + indentDelta = 0; + } + } } /* (non-Javadoc) @@ -198,8 +239,8 @@ public class XMLSerializer { String val = atts.getValue(i); checkForInvalidXmlChars(val, mXml11); } - mHandler.startElement(uri, localName, qName, atts); - + mHandler.startElement(uri, localName, qName, atts); + prevWasEndElement = false; } /* (non-Javadoc) @@ -208,6 +249,8 @@ public class XMLSerializer { public void characters(char[] ch, int start, int length) throws SAXException { checkForInvalidXmlChars(ch, start, length, mXml11); mHandler.characters(ch, start, length); +// nlOK = false; //unfortunately, non validating dom parsers can't detect ignorable whitespace, + // so they use characters instead... } /* (non-Javadoc) @@ -222,6 +265,7 @@ public class XMLSerializer { */ public void endElement(String uri, String localName, String qName) throws SAXException { mHandler.endElement(uri, localName, qName); + prevWasEndElement = true; } /* (non-Javadoc) @@ -263,6 +307,7 @@ public class XMLSerializer { * @see org.xml.sax.ContentHandler#startDocument() */ public void startDocument() throws SAXException { + indent = 0; mHandler.startDocument(); } Modified: uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java?rev=1342032&r1=1342031&r2=1342032&view=diff ============================================================================== --- uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java (original) +++ uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/resource/metadata/impl/MetaDataObject_implTest.java Wed May 23 20:32:08 2012 @@ -168,7 +168,7 @@ public class MetaDataObject_implTest ext * excercise the {@link MetaDataObject#getAttributeValue(String)} and * {@link MetaDataObject#setAttributeValue(String,Object)} methods. */ - public void bestXMLization() throws Exception { + public void testXMLization() throws Exception { try { // write objects to XML @@ -234,7 +234,7 @@ public class MetaDataObject_implTest ext Assert.assertEquals("banana", fruits[0].getName()); Assert.assertEquals("raspberry", fruits[1].getName()); - // property name ommitted but can be inferred from type of value + // property name omitted but can be inferred from type of value xmlStr = "<fruit><name>banana</name><string>yellow</string></fruit>"; xmlDoc = docBuilder.parse(new ByteArrayInputStream(xmlStr.getBytes())); TestFruitObject banana = new TestFruitObject(); @@ -255,7 +255,7 @@ public class MetaDataObject_implTest ext } } - public void bestSerialization() throws Exception { + public void testSerialization() throws Exception { try { byte[] apple1Bytes = SerializationUtils.serialize(apple1); TestFruitObject apple1a = (TestFruitObject) SerializationUtils.deserialize(apple1Bytes);