klease 01/11/21 14:13:36 Modified: src/org/apache/fop/fo FONode.java FOText.java FObj.java FObjMixed.java src/org/apache/fop/fo/flow Block.java Character.java Inline.java src/org/apache/fop/fo/pagination SimplePageMaster.java Added: src/org/apache/fop/fo AbstractCharIterator.java CharClass.java CharIterator.java InlineCharIterator.java OneCharIterator.java RecursiveCharIterator.java Log: Remove extra whitespace during FO tree construction Revision Changes Path 1.24 +27 -1 xml-fop/src/org/apache/fop/fo/FONode.java Index: FONode.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/FONode.java,v retrieving revision 1.23 retrieving revision 1.24 diff -u -r1.23 -r1.24 --- FONode.java 2001/11/09 11:32:37 1.23 +++ FONode.java 2001/11/21 22:13:36 1.24 @@ -1,5 +1,5 @@ /* - * $Id: FONode.java,v 1.23 2001/11/09 11:32:37 keiron Exp $ + * $Id: FONode.java,v 1.24 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. @@ -16,6 +16,9 @@ import org.xml.sax.Attributes; +import java.util.ListIterator; +import java.util.NoSuchElementException; + /** * base class for nodes in the XML tree * @@ -96,6 +99,29 @@ public FONode getParent() { return this.parent; + } + + /** + * Return an iterator over all the children of this FObj. + * @return A ListIterator. + */ + public ListIterator getChildren() { + return null; + } + + /** + * Return an iterator over the object's children starting + * at the pased node. + * @param childNode First node in the iterator + * @return A ListIterator or null if childNode isn't a child of + * this FObj. + */ + public ListIterator getChildren(FONode childNode) { + return null; + } + + public CharIterator charIterator() { + return new OneCharIterator(CharClass.CODE_EOT); } } 1.28 +50 -1 xml-fop/src/org/apache/fop/fo/FOText.java Index: FOText.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/FOText.java,v retrieving revision 1.27 retrieving revision 1.28 diff -u -r1.27 -r1.28 --- FOText.java 2001/11/11 22:09:37 1.27 +++ FOText.java 2001/11/21 22:13:36 1.28 @@ -1,5 +1,5 @@ /* - * $Id: FOText.java,v 1.27 2001/11/11 22:09:37 klease Exp $ + * $Id: FOText.java,v 1.28 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources." @@ -19,6 +19,8 @@ import org.apache.fop.layoutmgr.LayoutManager; import org.apache.fop.layoutmgr.TextLayoutManager; +import java.util.NoSuchElementException; + /** * a text node in the formatting object tree * @@ -89,7 +91,54 @@ } public LayoutManager getLayoutManager() { + // What if nothing left (length=0)? + if (length < ca.length) { + char[] tmp = ca; + ca = new char[length]; + System.arraycopy(tmp, 0, ca, 0, length); + } return new TextLayoutManager(this, ca, textInfo); + } + + public CharIterator charIterator() { + return new TextCharIterator(); + } + + private class TextCharIterator extends AbstractCharIterator { + int curIndex = 0; + public boolean hasNext() { + return (curIndex < length); + } + + public char nextChar() { + if (curIndex < length) { + // Just a char class? Don't actually care about the value! + return ca[curIndex++]; + } + else throw new NoSuchElementException(); + } + + public void remove() { + if (curIndex>0 && curIndex < length) { + // copy from curIndex to end to curIndex-1 + System.arraycopy(ca, curIndex, ca, curIndex-1, + length-curIndex); + length--; + curIndex--; + } + else if (curIndex == length) { + curIndex = --length; + } + } + + + public void replaceChar(char c) { + if (curIndex>0 && curIndex <= length) { + ca[curIndex-1]=c; + } + } + + } } 1.27 +20 -2 xml-fop/src/org/apache/fop/fo/FObj.java Index: FObj.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/FObj.java,v retrieving revision 1.26 retrieving revision 1.27 diff -u -r1.26 -r1.27 --- FObj.java 2001/11/14 13:45:44 1.26 +++ FObj.java 2001/11/21 22:13:36 1.27 @@ -1,5 +1,5 @@ /* - * $Id: FObj.java,v 1.26 2001/11/14 13:45:44 keiron Exp $ + * $Id: FObj.java,v 1.27 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. @@ -204,9 +204,27 @@ return null; } - + /** + * Return an iterator over all the children of this FObj. + * @return A ListIterator. + */ public ListIterator getChildren() { return children.listIterator(); + } + + /** + * Return an iterator over the object's children starting + * at the pased node. + * @param childNode First node in the iterator + * @return A ListIterator or null if childNode isn't a child of + * this FObj. + */ + public ListIterator getChildren(FONode childNode) { + int i = children.indexOf(childNode); + if (i >= 0) { + return children.listIterator(i); + } + else return null; } public void setIsInTableCell() { 1.18 +8 -1 xml-fop/src/org/apache/fop/fo/FObjMixed.java Index: FObjMixed.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/FObjMixed.java,v retrieving revision 1.17 retrieving revision 1.18 diff -u -r1.17 -r1.18 --- FObjMixed.java 2001/11/14 13:45:44 1.17 +++ FObjMixed.java 2001/11/21 22:13:36 1.18 @@ -1,5 +1,5 @@ /* - * $Id: FObjMixed.java,v 1.17 2001/11/14 13:45:44 keiron Exp $ + * $Id: FObjMixed.java,v 1.18 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. @@ -14,6 +14,7 @@ import org.apache.fop.apps.StreamRenderer; import org.apache.fop.datatypes.ColorType; + /** * base class for representation of mixed content formatting objects * and their processing @@ -92,6 +93,12 @@ } return new Status(Status.OK); } + + public CharIterator charIterator() { + return new RecursiveCharIterator(this); + } + + } 1.1 xml-fop/src/org/apache/fop/fo/AbstractCharIterator.java Index: AbstractCharIterator.java =================================================================== /* * $Id: AbstractCharIterator.java,v 1.1 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. */ package org.apache.fop.fo; // FOP import org.apache.fop.apps.FOPException; import java.util.NoSuchElementException; public abstract class AbstractCharIterator implements CharIterator, Cloneable { abstract public boolean hasNext(); abstract public char nextChar() throws NoSuchElementException ; public Object next() throws NoSuchElementException { return new Character(nextChar()); } public void remove() { throw new UnsupportedOperationException(); } public void replaceChar(char c) { } public Object clone() { try { return super.clone(); } catch (CloneNotSupportedException ex) { return null; } } }; 1.1 xml-fop/src/org/apache/fop/fo/CharClass.java Index: CharClass.java =================================================================== /* * $Id: CharClass.java,v 1.1 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources." */ package org.apache.fop.fo; /** * A character class allowing to distinguish whitespace, LF, other text. */ public class CharClass { /** Character code used to signal a character boundary in * inline content, such as an inline with borders and padding * or a nested block object. */ public static final char CODE_EOT=0; public static final int UCWHITESPACE=0; // unicode white space public static final int LINEFEED=1; public static final int EOT=2; // Boundary beteween text runs public static final int NONWHITESPACE=3; public static final int XMLWHITESPACE=4; /** * Return the appropriate CharClass constant for the type * of the passed character. */ public static int classOf(char c) { if (c == CODE_EOT) return EOT; if (c == '\n') return LINEFEED; if ( c==' '|| c == '\r' || c=='\t' ) return XMLWHITESPACE; if (isAnySpace(c)) return UCWHITESPACE; return NONWHITESPACE; } /** * Helper method to determine if the character is a * space with normal behaviour. Normal behaviour means that * it's not non-breaking */ private static boolean isSpace(char c) { return (c == ' ' || (c >= '\u2000' && c <= '\u200B')); // c == '\u2000' // en quad // c == '\u2001' // em quad // c == '\u2002' // en space // c == '\u2003' // em space // c == '\u2004' // three-per-em space // c == '\u2005' // four--per-em space // c == '\u2006' // six-per-em space // c == '\u2007' // figure space // c == '\u2008' // punctuation space // c == '\u2009' // thin space // c == '\u200A' // hair space // c == '\u200B' // zero width space } /** * Method to determine if the character is a nonbreaking * space. */ private static boolean isNBSP(char c) { return (c == '\u00A0' || // normal no-break space c == '\u202F' || // narrow no-break space c == '\u3000' || // ideographic space c == '\uFEFF') ; // zero width no-break space } /** * @return true if the character represents any kind of space */ private static boolean isAnySpace(char c) { return (isSpace(c) || isNBSP(c)); } } 1.1 xml-fop/src/org/apache/fop/fo/CharIterator.java Index: CharIterator.java =================================================================== /* * $Id: CharIterator.java,v 1.1 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. */ package org.apache.fop.fo; import java.util.Iterator; import java.util.NoSuchElementException; public interface CharIterator extends Iterator { char nextChar() throws NoSuchElementException ; void replaceChar(char c); Object clone(); } 1.1 xml-fop/src/org/apache/fop/fo/InlineCharIterator.java Index: InlineCharIterator.java =================================================================== package org.apache.fop.fo; import org.apache.fop.layout.BorderAndPadding; import java.util.Iterator; import java.util.ListIterator; import java.util.NoSuchElementException; public class InlineCharIterator extends RecursiveCharIterator { private boolean bStartBoundary=false; private boolean bEndBoundary=false; public InlineCharIterator(FObj fobj, BorderAndPadding bap) { super(fobj); checkBoundaries(bap); } private void checkBoundaries(BorderAndPadding bap) { // TODO! use start and end in BAP!! bStartBoundary = (bap.getBorderLeftWidth(false)>0 || bap.getPaddingLeft(false)>0); bEndBoundary = (bap.getBorderRightWidth(false)>0 || bap.getPaddingRight(false)>0); } public boolean hasNext() { if (bStartBoundary) return true; return (super.hasNext() || bEndBoundary); /* If super.hasNext() returns false, * we return true if we are going to return a "boundary" signal * else false. */ } public char nextChar() throws NoSuchElementException { if (bStartBoundary) { bStartBoundary=false; return CharClass.CODE_EOT; } try { return super.nextChar(); } catch (NoSuchElementException e) { // Underlying has nothing more to return // Check end boundary char if (bEndBoundary) { bEndBoundary=false; return CharClass.CODE_EOT; } else throw e; } } } 1.1 xml-fop/src/org/apache/fop/fo/OneCharIterator.java Index: OneCharIterator.java =================================================================== /* * $Id: OneCharIterator.java,v 1.1 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. */ package org.apache.fop.fo; import java.util.Iterator; import java.util.NoSuchElementException; public class OneCharIterator extends AbstractCharIterator { private boolean bFirst=true; private char charCode; public OneCharIterator(char c) { this.charCode = c; } public boolean hasNext() { return bFirst; } public char nextChar() throws NoSuchElementException { if (bFirst) { bFirst=false; return charCode; } else throw new NoSuchElementException(); } } 1.1 xml-fop/src/org/apache/fop/fo/RecursiveCharIterator.java Index: RecursiveCharIterator.java =================================================================== package org.apache.fop.fo; import java.util.Iterator; import java.util.ListIterator; import java.util.NoSuchElementException; public class RecursiveCharIterator extends AbstractCharIterator { Iterator childIter = null; // Child flow objects CharIterator curCharIter = null; // Children's characters private FONode fobj; private FONode curChild; public RecursiveCharIterator(FObj fobj) { // Set up first child iterator this.fobj = fobj; this.childIter = fobj.getChildren(); getNextCharIter(); } public RecursiveCharIterator(FObj fobj, FONode child) { // Set up first child iterator this.fobj = fobj; this.childIter = fobj.getChildren(child); getNextCharIter(); } public CharIterator mark() { return (CharIterator) this.clone(); } public Object clone() { RecursiveCharIterator ci = (RecursiveCharIterator)super.clone(); ci.childIter = fobj.getChildren(ci.curChild); ci.curCharIter = (CharIterator)curCharIter.clone(); return ci; } public void replaceChar(char c) { if (curCharIter != null) { curCharIter.replaceChar(c); } } private void getNextCharIter() { if (childIter.hasNext()) { this.curChild = (FONode)childIter.next(); this.curCharIter = curChild.charIterator(); } else { curChild = null; curCharIter = null; } } public boolean hasNext() { while (curCharIter != null) { if (curCharIter.hasNext()==false) { getNextCharIter(); } else return true; } return false; } public char nextChar() throws NoSuchElementException { if (curCharIter != null) { return curCharIter.nextChar(); } else throw new NoSuchElementException(); } public void remove() { if (curCharIter != null) { curCharIter.remove(); } } } 1.48 +157 -2 xml-fop/src/org/apache/fop/fo/flow/Block.java Index: Block.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/flow/Block.java,v retrieving revision 1.47 retrieving revision 1.48 diff -u -r1.47 -r1.48 --- Block.java 2001/11/14 13:45:44 1.47 +++ Block.java 2001/11/21 22:13:36 1.48 @@ -1,5 +1,5 @@ /* - * $Id: Block.java,v 1.47 2001/11/14 13:45:44 keiron Exp $ + * $Id: Block.java,v 1.48 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. @@ -54,10 +54,19 @@ String id; int span; + private int wsTreatment; //ENUMERATION + private int lfTreatment; //ENUMERATION + private boolean bWScollapse; //true if white-space-collapse=true // this may be helpful on other FOs too boolean anythingLaidOut = false; + /** + * Index of first inline-type FO seen in a sequence. + * Used during FO tree building to do white-space handling. + */ + private FONode firstInlineChild = null; + public Block(FONode parent) { super(parent); } @@ -65,6 +74,10 @@ public void handleAttrs(Attributes attlist) throws FOPException { super.handleAttrs(attlist); this.span = this.properties.get("span").getEnum(); + this.wsTreatment = this.properties.get("white-space-treatment").getEnum(); + this.bWScollapse = (this.properties.get("white-space-collapse").getEnum() == + Constants.TRUE); + this.lfTreatment = this.properties.get("linefeed-treatment").getEnum(); } public Status layout(Area area) throws FOPException { @@ -118,7 +131,7 @@ // this.properties.get("line-height-shift-adjustment"); // this.properties.get("line-stacking-strategy"); // this.properties.get("orphans"); - // this.properties.get("space-treatment"); + // this.properties.get("white-space-treatment"); // this.properties.get("span"); // this.properties.get("text-align"); // this.properties.get("text-align-last"); @@ -359,5 +372,147 @@ public boolean generatesInlineAreas() { return false; + } + + + public void addChild(FONode child) { + // Handle whitespace based on values of properties + // Handle a sequence of inline-producing children in + // one pass + if (((FObj)child).generatesInlineAreas()) { + if (firstInlineChild == null) { + firstInlineChild = child; + } + // lastInlineChild = children.size(); + } + else { + // Handle whitespace in preceeding inline areas if any + handleWhiteSpace(); + } + super.addChild(child); + } + + public void end() { + handleWhiteSpace(); + } + + private void handleWhiteSpace() { + log.debug("fo:block: handleWhiteSpace"); + if (firstInlineChild != null) { + boolean bInWS=false; + boolean bPrevWasLF=false; + RecursiveCharIterator charIter = + new RecursiveCharIterator(this, firstInlineChild); + LFchecker lfCheck = new LFchecker(charIter); + + while (charIter.hasNext()) { + switch (CharClass.classOf(charIter.nextChar())) { + case CharClass.XMLWHITESPACE: + /* Some kind of whitespace character, except linefeed. */ + boolean bIgnore=false; + + switch (wsTreatment) { + case Constants.IGNORE: + bIgnore=true; + break; + case Constants.IGNORE_IF_BEFORE_LINEFEED: + bIgnore = lfCheck.nextIsLF(); + break; + case Constants.IGNORE_IF_SURROUNDING_LINEFEED: + bIgnore = (bPrevWasLF || lfCheck.nextIsLF()); + break; + case Constants.IGNORE_IF_AFTER_LINEFEED: + bIgnore = bPrevWasLF; + break; + } + // Handle ignore + if (bIgnore) { + charIter.remove(); + } + else if (bWScollapse) { + if (bInWS || (lfTreatment == Constants.PRESERVE && + (bPrevWasLF || lfCheck.nextIsLF()))) { + charIter.remove(); + } + else { + bInWS = true; + } + } + break; + + case CharClass.LINEFEED: + /* A linefeed */ + lfCheck.reset(); + bPrevWasLF=true; // for following whitespace + + switch (lfTreatment) { + case Constants.IGNORE: + charIter.remove(); + break; + case Constants.TREAT_AS_SPACE: + if (bInWS) { + // only if bWScollapse=true + charIter.remove(); + } + else { + if (bWScollapse) bInWS=true; + charIter.replaceChar('\u0020'); + } + break; + case Constants.TREAT_AS_ZERO_WIDTH_SPACE: + charIter.replaceChar('\u200b'); + // Fall through: this isn't XML whitespace + case Constants.PRESERVE: + bInWS=false; + break; + } + break; + + case CharClass.EOT: + // A "boundary" objects such as non-character inline + // or nested block object was encountered. + // If any whitespace run in progress, finish it. + // FALL THROUGH + + case CharClass.UCWHITESPACE: // Non XML-whitespace + case CharClass.NONWHITESPACE: + /* Any other character */ + bInWS = bPrevWasLF=false; + lfCheck.reset(); + break; + } + } + firstInlineChild = null; + } + } + + private static class LFchecker { + private boolean bNextIsLF=false; + private RecursiveCharIterator charIter; + + LFchecker(RecursiveCharIterator charIter) { + this.charIter = charIter; + } + + boolean nextIsLF() { + if (bNextIsLF==false) { + CharIterator lfIter = charIter.mark(); + while (lfIter.hasNext()) { + char c = lfIter.nextChar(); + if (c == '\n') { + bNextIsLF=true; + break; + } + else if (CharClass.classOf(c)!=CharClass.XMLWHITESPACE) { + break; + } + } + } + return bNextIsLF; + } + + void reset() { + bNextIsLF=false; + } } } 1.15 +10 -2 xml-fop/src/org/apache/fop/fo/flow/Character.java Index: Character.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/flow/Character.java,v retrieving revision 1.14 retrieving revision 1.15 diff -u -r1.14 -r1.15 --- Character.java 2001/11/09 11:32:37 1.14 +++ Character.java 2001/11/21 22:13:36 1.15 @@ -1,5 +1,5 @@ /* - * $Id: Character.java,v 1.14 2001/11/09 11:32:37 keiron Exp $ + * $Id: Character.java,v 1.15 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. @@ -37,6 +37,8 @@ public final static int OK = 0; public final static int DOESNOT_FIT = 1; + private char characterValue; + public Character(FONode parent) { super(parent); this.name = "fo:character"; @@ -112,7 +114,7 @@ } // Character specific properties - char characterValue = this.properties.get("character").getCharacter(); + characterValue = this.properties.get("character").getCharacter(); // initialize id @@ -146,5 +148,11 @@ return new Status(Status.OK); } + + public CharIterator charIterator() { + return new OneCharIterator(characterValue); + // But what it the character is ignored due to white space handling? + } + } 1.11 +6 -1 xml-fop/src/org/apache/fop/fo/flow/Inline.java Index: Inline.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/flow/Inline.java,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- Inline.java 2001/11/09 11:32:37 1.10 +++ Inline.java 2001/11/21 22:13:36 1.11 @@ -1,5 +1,5 @@ /* - * $Id: Inline.java,v 1.10 2001/11/09 11:32:37 keiron Exp $ + * $Id: Inline.java,v 1.11 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. @@ -85,6 +85,11 @@ if (textDecoration == TextDecoration.LINE_THROUGH) { this.lineThrough = true; } + } + + + public CharIterator charIterator() { + return new InlineCharIterator(this, propMgr.getBorderAndPadding()); } } 1.21 +2 -2 xml-fop/src/org/apache/fop/fo/pagination/SimplePageMaster.java Index: SimplePageMaster.java =================================================================== RCS file: /home/cvs/xml-fop/src/org/apache/fop/fo/pagination/SimplePageMaster.java,v retrieving revision 1.20 retrieving revision 1.21 diff -u -r1.20 -r1.21 --- SimplePageMaster.java 2001/11/15 12:40:31 1.20 +++ SimplePageMaster.java 2001/11/21 22:13:36 1.21 @@ -1,5 +1,5 @@ /* - * $Id: SimplePageMaster.java,v 1.20 2001/11/15 12:40:31 keiron Exp $ + * $Id: SimplePageMaster.java,v 1.21 2001/11/21 22:13:36 klease Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. @@ -112,7 +112,7 @@ new Rectangle(0,0, pageWidth,pageHeight))); - _regions = null; + // _regions = null; // PageSequence access SimplePageMaster.... children = null; properties = null; }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]