Update of /var/cvs/src/org/mmbase/util/transformers In directory james.mmbase.org:/tmp/cvs-serv18402/src/org/mmbase/util/transformers
Modified Files: SpaceReducer.java Log Message: created fix for issue MMB-1618 You can now easily add tags who's body will be excluded from the whiteline filtering. See also: http://cvs.mmbase.org/viewcvs/src/org/mmbase/util/transformers See also: http://www.mmbase.org/jira/browse/MMB-1618 Index: SpaceReducer.java =================================================================== RCS file: /var/cvs/src/org/mmbase/util/transformers/SpaceReducer.java,v retrieving revision 1.17 retrieving revision 1.18 diff -u -b -r1.17 -r1.18 --- SpaceReducer.java 11 Dec 2007 12:23:34 -0000 1.17 +++ SpaceReducer.java 5 Mar 2008 12:53:59 -0000 1.18 @@ -10,6 +10,10 @@ package org.mmbase.util.transformers; import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.mmbase.util.logging.*; @@ -21,25 +25,56 @@ * Except if they are in between "<pre>" and "</pre>". (Note: perhaps this last behaviour should be made * configurable). * - * @todo 'pre' stuff not yet implemented. - * * @author Michiel Meeuwissen + * @author Ernst Bunders * @since MMBase-1.7 - * @version $Id: SpaceReducer.java,v 1.17 2007/12/11 12:23:34 michiel Exp $ + * @version $Id: SpaceReducer.java,v 1.18 2008/03/05 12:53:59 ernst Exp $ */ public class SpaceReducer extends BufferedReaderTransformer implements CharTransformer { private static Logger log = Logging.getLoggerInstance(SpaceReducer.class); + /** + * This is a list of tags, of who's body the empty lines should not be filtered. + */ + private static List<Tag> tagsToPass = new ArrayList<Tag>(); + static{ + tagsToPass.add(new Tag("pre")); + tagsToPass.add(new Tag("textarea")); + } + protected Tag currentlyOpened = null; protected boolean transform(PrintWriter bw, String line) { - if (!line.trim().equals("")) { +// if (!line.trim().equals("") { +// bw.write(line); +// +// return true; +// } else { +// return false; +// } + boolean result = false; + if(!line.trim().equals("") || currentlyOpened != null){ bw.write(line); - return true; - } else { - return false; + result = true; } + if(currentlyOpened != null){ + //look for a closing tag. + currentlyOpened.setLine(line); + if(currentlyOpened.hasClosed()){ + currentlyOpened = null; + } + }else{ + //look for an opening tag + for (Tag tag : tagsToPass) { + tag.setLine(line); + if(tag.hasOpened()){ + currentlyOpened = tag; + } + } + } + return result; + } /** @@ -88,4 +123,104 @@ public String toString() { return "SPACEREDUCER"; } + + /** + * this is a helper class that can check if a tag was opened or closed in a line of text + * It first removes all bodyless versions of the tag from the line, and then counts all opening and + * closing occurrences of the tag. + * This will not work if an opening or closing tag is partly written on the next line, so it's not perfect. + * <ul> + * <li>have no body + * <li>can be opened and closed multiple times in one line. + * </ul> + * @author ebunders + * + */ + private static class Tag{ + private boolean hasOpened = false; + private boolean hasClosed = false; + private Pattern openingPattern; + private Pattern closingPattern; + private Pattern noBodyPattern; + private String name; + + public Tag(String name){ + openingPattern = Pattern.compile("<[\\s]*"+name+"(\\s+[a-zA-Z]+\\=\"[\\S]+\")*\\s*>", Pattern.CASE_INSENSITIVE); + closingPattern = Pattern.compile("<[\\s]*/\\s*"+name+"\\s*>", Pattern.CASE_INSENSITIVE); + noBodyPattern = Pattern.compile("<[\\s]*"+name+"\\s+([a-zA-Z]+\\=\"[\\S]+\")*\\s*/\\s*>", Pattern.CASE_INSENSITIVE); + this.name=name; + } + + public void setLine(String line){ + //remove the bodyless versions of the tag from this line (if they exist, which they should not) + line = removeTagsWithoutBody(line); + + //count the opening and closing versions of the tag + int opening = countOccurences(openingPattern, line); + int closing = countOccurences(closingPattern, line); + hasOpened = opening > closing; + hasClosed = closing > opening; + } + + private int countOccurences(Pattern pattern, String line) { + Matcher m = pattern.matcher(line); + int counter = 0; + while(m.find() && counter < 5){ + counter ++; + line = line.substring(m.end(), line.length()); + m = pattern.matcher(line); + } + return counter; + } + + /** + * remove all the occurrences of bodyless versions of the tag + * they should not be there, but for safety + * + * @param line + * @return + */ + private String removeTagsWithoutBody(String line) { + Matcher m = noBodyPattern.matcher(line); + while(m.find()){ + line = line.substring(0, m.start()) + line.substring(m.end(), line.length()); + m = noBodyPattern.matcher(line); + } + return line; + } + + public boolean hasOpened(){ + return hasOpened; + } + + public boolean hasClosed(){ + return hasClosed; + } + public String toString() { + return name; + } + } + + /** + * method to test the tag class + * TODO: this should be a unit test + * @param args + */ + public static void main (String[] args){ + test("bladie hallo<pre> en nog wat"); + test("bladie hallo<pre> en nog wat<pre>daarna"); + test("bladie hallo<pre> en nog wat< / pre>< pre> <p>jaja</p> <a href=\"nogwat\">jaja</a>"); + test("jaja</pre>"); + test("jaja</pre> <pre> hoera</pre><p>test</p>"); + test("jaja<pre>bla <pre /></pre>filter out bodyless tags"); + System.out.println("FINISED"); + } + + public static void test(String line){ + System.out.println("testing line: "+line); + Tag tag = new Tag("pre"); + tag.setLine(line); + System.out.println("opening: "+tag.hasOpened()+" :: closed: "+tag.hasClosed()); + System.out.println("****************\n"); + } } _______________________________________________ Cvs mailing list Cvs@lists.mmbase.org http://lists.mmbase.org/mailman/listinfo/cvs