rwinston 2004/09/23 05:12:00 Added: net/src/java/org/apache/commons/net/nntp Article.java Threadable.java Threader.java Log: PR: 26282 Added classes to facilitate message threading Revision Changes Path 1.1 jakarta-commons/net/src/java/org/apache/commons/net/nntp/Article.java Index: Article.java =================================================================== /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Commons" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.net.nntp; import java.util.ArrayList; import java.util.StringTokenizer; /** * This is a class that contains the basic state needed for message retrieval and threading. * With thanks to Jamie Zawinski <[EMAIL PROTECTED]> * @author rwinston <[EMAIL PROTECTED]> * */ public class Article implements Threadable { private int articleNumber; private String subject; private String date; private String articleId; private String simplifiedSubject; private String from; private StringBuffer header; private StringBuffer references; private boolean isReply = false; public Article kid, next; public Article() { header = new StringBuffer(); } /** * Adds an arbitrary header key and value to this message's header. * @param name the header name * @param val the header value */ public void addHeaderField(String name, String val) { header.append(name); header.append(": "); header.append(val); header.append('\n'); } /** * Adds a message-id to the list of messages that this message references (i.e. replies to) * @param msgId */ public void addReference(String msgId) { if (references == null) { references = new StringBuffer(); references.append("References: "); } references.append(msgId); references.append("\t"); } /** * Returns the MessageId references as an array of Strings * @return an array of message-ids */ public String[] getReferences() { if (references == null) return new String[0]; ArrayList list = new ArrayList(); int terminator = references.indexOf(":"); StringTokenizer st = new StringTokenizer(references.substring(terminator), "\t"); while (st.hasMoreTokens()) { list.add(st.nextToken()); } return (String[]) list.toArray(); } /** * Attempts to parse the subject line for some typical reply signatures, and strip them out * */ private void simplifySubject() { int start = 0; String subject = getSubject(); int len = subject.length(); boolean done = false; while (!done) { done = true; // skip whitespace // "Re: " breaks this while (start < len && subject.charAt(start) == ' ') { start++; } if (start < (len - 2) && (subject.charAt(start) == 'r' || subject.charAt(start) == 'R') && (subject.charAt(start + 1) == 'e' || subject.charAt(start + 1) == 'E')) { if (subject.charAt(start + 2) == ':') { start += 3; // Skip "Re:" isReply = true; done = false; } else if ( start < (len - 2) && (subject.charAt(start + 2) == '[' || subject.charAt(start + 2) == '(')) { int i = start + 3; while (i < len && subject.charAt(i) >= '0' && subject.charAt(i) <= '9') i++; if (i < (len - 1) && (subject.charAt(i) == ']' || subject.charAt(i) == ')') && subject.charAt(i + 1) == ':') { start = i + 2; isReply = true; done = false; } } } if (simplifiedSubject == "(no subject)") simplifiedSubject = ""; int end = len; while (end > start && subject.charAt(end - 1) < ' ') end--; if (start == 0 && end == len) simplifiedSubject = subject; else simplifiedSubject = subject.substring(start, end); } } /** * Recursive method that traverses a pre-threaded graph (or tree) * of connected Article objects and prints them out. * @param article the root of the article 'tree' * @param depth the current tree depth */ public static void printThread(Article article, int depth) { for (int i = 0; i < depth; ++i) System.out.print("==>"); System.out.println(article.getSubject() + "\t" + article.getFrom()); if (article.kid != null) printThread(article.kid, depth + 1); if (article.next != null) printThread(article.next, depth); } public String getArticleId() { return articleId; } public int getArticleNumber() { return articleNumber; } public String getDate() { return date; } public String getFrom() { return from; } public String getSubject() { return subject; } public void setArticleId(String string) { articleId = string; } public void setArticleNumber(int i) { articleNumber = i; } public void setDate(String string) { date = string; } public void setFrom(String string) { from = string; } public void setSubject(String string) { subject = string; } public boolean isDummy() { return (getSubject() == null); } public String messageThreadId() { return articleId; } public String[] messageThreadReferences() { return getReferences(); } public String simplifiedSubject() { if(simplifiedSubject == null) simplifySubject(); return simplifiedSubject; } public boolean subjectIsReply() { if(simplifiedSubject == null) simplifySubject(); return isReply; } public void setChild(Threadable child) { this.kid = (Article) child; flushSubjectCache(); } private void flushSubjectCache() { simplifiedSubject = null; } public void setNext(Threadable next) { this.next = (Article)next; flushSubjectCache(); } public Threadable makeDummy() { return (Threadable)new Article(); } } 1.1 jakarta-commons/net/src/java/org/apache/commons/net/nntp/Threadable.java Index: Threadable.java =================================================================== /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Commons" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.net.nntp; /** * A placeholder interface for threadable message objects * Author: Rory Winston <[EMAIL PROTECTED]> * */ public interface Threadable { public boolean isDummy(); public String messageThreadId(); public String[] messageThreadReferences(); public String simplifiedSubject(); public boolean subjectIsReply(); public void setChild(Threadable child); public void setNext(Threadable next); public Threadable makeDummy(); } 1.1 jakarta-commons/net/src/java/org/apache/commons/net/nntp/Threader.java Index: Threader.java =================================================================== /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Commons" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.net.nntp; /** * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> * * @author rwinston <[EMAIL PROTECTED]> * */ import java.util.HashMap; import java.util.Iterator; public class Threader { private ThreadContainer root; private HashMap idTable; private int bogusIdCount = 0; /** * The main threader entry point - The client passes in an array of Threadable objects, and * the Threader constructs a connected 'graph' of messages * @param messages * @return */ public Threadable thread(Threadable[] messages) { if (messages == null) return null; idTable = new HashMap(); // walk through each Threadable element for (int i = 0; i < messages.length; ++i) { if (!messages[i].isDummy()) buildContainer(messages[i]); } root = findRootSet(); idTable.clear(); idTable = null; pruneEmptyContainers(root); root.reverseChildren(); gatherSubjects(); if (root.next != null) throw new RuntimeException("root node has a next:" + root); for (ThreadContainer r = root.child; r != null; r = r.next) { if (r.threadable == null) r.threadable = r.child.threadable.makeDummy(); } Threadable result = (root.child == null ? null : root.child.threadable); root.flush(); root = null; return result; } /** * * @param threadable */ private void buildContainer(Threadable threadable) { String id = threadable.messageThreadId(); ThreadContainer container = (ThreadContainer) idTable.get(id); // A ThreadContainer exists for this id already. This should be a forward reference, but may // be a duplicate id, in which case we will need to generate a bogus placeholder id if (container != null) { if (container.threadable != null) { // oops! duplicate ids... id = "<Bogus-id:" + (bogusIdCount++) + ">"; container = null; } else { // The container just contained a forward reference to this message, so let's // fill in the threadable field of the container with this message container.threadable = threadable; } } // No container exists for that message Id. Create one and insert it into the hash table. if (container == null) { container = new ThreadContainer(); container.threadable = threadable; idTable.put(id, container); } // Iterate through all of the references and create ThreadContainers for any references that // don't have them. ThreadContainer parentRef = null; { String[] references = threadable.messageThreadReferences(); for (int i = 0; i < references.length; ++i) { String refString = references[i]; ThreadContainer ref = (ThreadContainer) idTable.get(refString); // if this id doesnt have a container, create one if (ref == null) { ref = new ThreadContainer(); idTable.put(refString, ref); } // Link references together in the order they appear in the References: header, // IF they dont have a have a parent already && // IF it will not cause a circular reference if ((parentRef != null) && (ref.parent == null) && (parentRef != ref) && !(parentRef.findChild(ref))) { // Link ref into the parent's child list ref.parent = parentRef; ref.next = parentRef.child; parentRef.child = ref; } parentRef = ref; } } // parentRef is now set to the container of the last element in the references field. make that // be the parent of this container, unless doing so causes a circular reference if (parentRef != null && (parentRef == container || container.findChild(parentRef))) parentRef = null; // if it has a parent already, its because we saw this message in a References: field, and presumed // a parent based on the other entries in that field. Now that we have the actual message, we can // throw away the old parent and use this new one if (container.parent != null) { ThreadContainer rest, prev; for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) { if (rest == container) break; } if (rest == null) { throw new RuntimeException( "Didnt find " + container + " in parent" + container.parent); } // Unlink this container from the parent's child list if (prev == null) container.parent.child = container.next; else prev.next = container.next; container.next = null; container.parent = null; } // If we have a parent, link container into the parents child list if (parentRef != null) { container.parent = parentRef; container.next = parentRef.child; parentRef.child = container; } } /** * Find the root set of all existing ThreadContainers * @return root the ThreadContainer representing the root node */ private ThreadContainer findRootSet() { ThreadContainer root = new ThreadContainer(); Iterator iter = idTable.keySet().iterator(); while (iter.hasNext()) { Object key = iter.next(); ThreadContainer c = (ThreadContainer) idTable.get(key); if (c.parent == null) { if (c.next != null) throw new RuntimeException( "c.next is " + c.next.toString()); c.next = root.child; root.child = c; } } return root; } /** * Delete any empty or dummy ThreadContainers * @param parent */ private void pruneEmptyContainers(ThreadContainer parent) { ThreadContainer container, prev, next; for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = (container == null ? null : container.next)) { // Is it empty and without any children? If so,delete it if (container.threadable == null && container.child == null) { if (prev == null) parent.child = container.next; else prev.next = container.next; // Set container to prev so that prev keeps its same value the next time through the loop container = prev; } // Else if empty, with kids, and (not at root or only one kid) else if ( container.threadable == null && container.child != null && (container.parent != null || container.child.next == null)) { // We have an invalid/expired message with kids. Promote the kids to this level. ThreadContainer tail; ThreadContainer kids = container.child; // Remove this container and replace with 'kids'. if (prev == null) parent.child = kids; else prev.next = kids; // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next // i.e. splice kids into the list in place of container for (tail = kids; tail.next != null; tail = tail.next) tail.parent = container.parent; tail.parent = container.parent; tail.next = container.next; // next currently points to the item after the inserted items in the chain - reset that so we process the newly // promoted items next time round next = kids; // Set container to prev so that prev keeps its same value the next time through the loop container = prev; } else if (container.child != null) { // A real message , with kids // Iterate over the children pruneEmptyContainers(container); } } } /** * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. */ private void gatherSubjects() { int count = 0; for (ThreadContainer c = root.child; c != null; c = c.next) count++; // TODO verify this will avoid rehashing HashMap subjectTable = new HashMap((int) (count * 1.2), (float) 0.9); count = 0; for (ThreadContainer c = root.child; c != null; c = c.next) { Threadable threadable = c.threadable; // No threadable? If so, it is a dummy node in the root set. // Only root set members may be dummies, and they alway have at least 2 kids // Take the first kid as representative of the subject if (threadable == null) threadable = c.child.threadable; String subj = threadable.simplifiedSubject(); if (subj == null || subj == "") continue; ThreadContainer old = (ThreadContainer) subjectTable.get(subj); // Add this container to the table iff: // - There exists no container with this subject // - or this is a dummy container and the old one is not - the dummy one is // more interesting as a root, so put it in the table instead // - The container in the table has a "Re:" version of this subject, and // this container has a non-"Re:" version of this subject. The non-"Re:" version // is the more interesting of the two. if (old == null || (c.threadable == null && old.threadable != null) || (old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply())) { subjectTable.put(subj, c); count++; } } // If the table is empty, we're done if (count == 0) return; // subjectTable is now populated with one entry for each subject which occurs in the // root set. Iterate over the root set, and gather together the difference. ThreadContainer prev, c, rest; for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = (rest == null ? null : rest.next)) { Threadable threadable = c.threadable; // is it a dummy node? if (threadable == null) threadable = c.child.threadable; String subj = threadable.simplifiedSubject(); // Dont thread together all subjectless messages if (subj == null || subj == "") continue; ThreadContainer old = (ThreadContainer) subjectTable.get(subj); if (old == c) // That's us continue; // We have now found another container in the root set with the same subject // Remove the "second" message from the root set if (prev == null) root.child = c.next; else prev.next = c.next; c.next = null; if (old.threadable == null && c.threadable == null) { // both dummies - merge them ThreadContainer tail; for (tail = old.child; tail != null && tail.next != null; tail = tail.next); tail.next = c.child; for (tail = c.child; tail != null; tail = tail.next) tail.parent = old; c.child = null; } else if ( old.threadable == null || (c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply())) { // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old c.parent = old; c.next = old.child; old.child = c; } else { // else make the old and new messages be children of a new dummy container. // We create a new container object for old.msg and empty the old container ThreadContainer newc = new ThreadContainer(); newc.threadable = old.threadable; newc.child = old.child; for (ThreadContainer tail = newc.child; tail != null; tail = tail.next) tail.parent = newc; old.threadable = null; old.child = null; c.parent = old; newc.parent = old; // Old is now a dummy- give it 2 kids , c and newc old.child = c; c.next = newc; } // We've done a merge, so keep the same prev c = prev; } subjectTable.clear(); subjectTable = null; } } /** * A placeholder utility class, used for constructing a tree of Threadables * Originall implementation by Jamie Zawinski. * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a> * Threadable objects * @author Rory Winston <[EMAIL PROTECTED]> */ class ThreadContainer { Threadable threadable; ThreadContainer parent; ThreadContainer prev; ThreadContainer next; ThreadContainer child; /** * * @param container * @return true if child is under self's tree. Detects circular references */ boolean findChild(ThreadContainer target) { if (child == null) return false; else if (child == target) return true; else return child.findChild(target); } // Copy the ThreadContainer tree structure down into the underlying Threadable objects // (Make the Threadable tree look like the ThreadContainer tree) void flush() { if (parent != null && threadable == null) throw new RuntimeException("no threadable in " + this.toString()); parent = null; if (threadable != null) threadable.setChild(child == null ? null : child.threadable); if (child != null) { child.flush(); child = null; } if (threadable != null) threadable.setNext(next == null ? null : next.threadable); if (next != null) { next.flush(); next = null; } threadable = null; } /** * Reverse the entire set of children * */ void reverseChildren() { if (child != null) { ThreadContainer kid, prev, rest; for (prev = null, kid = child, rest = kid.next; kid != null; prev = kid, kid = rest, rest = (rest == null ? null : rest.next)) kid.next = prev; child = prev; // Do it for the kids for (kid = child; kid != null; kid = kid.next) kid.reverseChildren(); } } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]