Author: burton
Date: Fri Jan 28 15:39:37 2005
New Revision: 148951

URL: http://svn.apache.org/viewcvs?view=rev&rev=148951
Log:
patch from Joseph Ottinger to rework the FeedParser as an interface
Modified:
   
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java
   
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java
   
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java

Modified: 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java
Url: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java?view=diff&rev=148951&p1=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java&r1=148950&p2=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java&r2=148951
==============================================================================
--- 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java
     (original)
+++ 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java
     Fri Jan 28 15:39:37 2005
@@ -16,23 +16,7 @@
 
 package org.apache.commons.feedparser;
 
-import org.apache.commons.feedparser.tools.*;
-
-import java.io.*;
-import java.net.*;
-import java.util.*;
-
-import org.jdom.Element;
-import org.jdom.Attribute;
-import org.jdom.CDATA;
-import org.jdom.Text;
-import org.jdom.Comment;
-import org.jdom.output.*;
-import org.jdom.input.*;
-
-import org.jaxen.jdom.*;
-
-import org.apache.log4j.Logger;
+import java.io.InputStream;
 
 /**
  * This FeedParser implementation is based on JDOM and Jaxen and is based 
around
@@ -43,9 +27,7 @@
  * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton (burtonator)</a>
  * @version $Id$
  */
-public class FeedParser {
-
-    private static Logger log = Logger.getLogger( FeedParser.class );
+public interface FeedParser {
     
     /**
      * Parse this feed.
@@ -54,165 +36,23 @@
      * may be null but is used when an exception is thrown to aid debugging.
      *
      */
-    public static void parse( FeedParserListener listener,
-                              InputStream is ,
-                              String resource ) throws FeedParserException {
-
-        try { 
-
-            // Need to massage our XML support for UTF-8 to prevent the dreaded
-            // "Invalid byte 1 of 1-byte UTF-8 sequence" content bug in some
-            // default feeds.  This was tested a great deal under NewsMonster
-            // and I'm happy with the results.  Within FeedParser 2.0 we will 
be
-            // using SAX2 so this won't be as big of a problem.  In FeedParser
-            // 2.0 (or as soon as we use SAX) this code should be totally
-            // removed to use the original stream.
-
-            is = getCorrectInputStream( is );
-
-            //OK.  Now we have the right InputStream so we should build our DOM
-            //and exec.
-            DOMBuilder builder = new DOMBuilder();
-
-            org.jdom.Document doc = builder.build( is );
-
-            parse( listener, doc );
-
-        } catch ( FeedParserException fpe ) {
-            //if an explicit FeedParserException is thrown just rethrow it..
-            throw fpe;
-        } catch ( Throwable t ) {
-
-            //FIXME: when this is a JDOM or XML parser Exception we should
-            //detect when we're working with an XHTML or HTML file and then
-            //parse it with an XFN/XOXO event listener.
-            
-            throw new FeedParserException( t, resource );
-        }
-
-    }
+    public void parse( FeedParserListener listener,
+                       InputStream is ,
+                       String resource ) throws FeedParserException;
+    
 
     /**
-     * Perform the Xerces UTF8 correction and FeedFilter.
-     *
-     * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
+     * @deprecated Use #parse( FeedParserException, InputStream, String )
      */
-    private static InputStream getCorrectInputStream( InputStream is )
-        throws Exception {
-
-        byte[] bytes = toByteArray( is );
-
-        //FIXME: if we return the WRONG content type here we will break.
-        //getBytes()... UTF-16 and UTF-32 especially.  We should also perform
-        //HTTP Content-Type parsing here to preserve the content type.  This 
can
-        //be fixed by integrating our networking API from NewsMonster.
-        
-        String encoding = XMLEncodingParser.parse( bytes );
-
-        if ( encoding == null )
-            encoding = "UTF-8";
-
-        if ( encoding.startsWith( "UTF" ) ) {
-                
-            String result = XMLCleanser.cleanse( bytes, encoding );
-            bytes = FeedFilter.parse( result, encoding );
-
-        } else {
-
-            bytes = FeedFilter.parse( bytes, encoding );
-             
-        }
-
-        //remove prefix whitespace, intern HTML entities, etc.
-
-        //build an input stream from the our bytes for parsing...
-        is = new ByteArrayInputStream( bytes );
-
-        return is;
-        
-    }
+    public void parse( FeedParserListener listener,
+                       InputStream is ) throws FeedParserException;
 
     /**
      * Parse this feed.
      *
-     * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
      */
-    public static void parse( FeedParserListener listener,
-                              org.jdom.Document doc ) throws 
FeedParserException {
-
-        try {
-
-            String root = doc.getRootElement().getName();
-            
-            //Handle OPML
-            if ( "opml".equals( root ) ) {
-                OPMLFeedParser.parse( listener, doc );
-                return;
-            }
-
-            //Handle changes.xml
-            if ( "weblogUpdates".equals( root ) ) {
-                ChangesFeedParser.parse( listener, doc );
-                return;
-            }
-
-            //Handle ATOM
-            if ( "feed".equals( root ) ) {
-                AtomFeedParser.parse( listener, doc );
-                return;
-            }
-
-            //Handle FOAF
-            if ( doc.getRootElement().getChildren( "Person", NS.FOAF ).size() 
> 0 ) {
-                FOAFFeedParser.parse( listener, doc );
-                return;
-            }
-
-            //FIXME: if this is XHTML we need to handle this with either an XFN
-            //or an XOXO directory parser.  There might be more metadata we 
need
-            //to parse here.  (also I wonder if this could be a chance to do
-            //autodiscovery).
-            
-            //fall back on RDF and RSS parsing.
-
-            //FIXME: if this is an UNKNOWN format We need to throw an
-            //UnsupportedFeedxception (which extends FeedParserException)
-            //
-            // In this situation the ROOT elements should be: rss or RDF
-            
-            RSSFeedParser.parse( listener, doc );
-
-        } catch ( FeedParserException fpe ) {
-            //if an explicit FeedParserException is thrown just rethrow it..
-            throw fpe;
-        } catch ( Throwable t ) { throw new FeedParserException( t ); }
-
-    }
-
-    /**
-     * Convert an InputStream to a byte array.
-     */
-    public static byte[] toByteArray( InputStream is ) throws IOException {
-
-        //WARNING: 
-        ByteArrayOutputStream bos = new ByteArrayOutputStream();
-      
-        //now process the Reader...
-        byte data[] = new byte[200];
-    
-        int readCount = 0;
-
-        while( ( readCount = is.read( data )) > 0 ) {
-            
-            bos.write( data, 0, readCount );
-        }
-
-        is.close();
-        bos.close();
-
-        return bos.toByteArray();
-
-    }
+    public void parse( FeedParserListener listener,
+                       org.jdom.Document doc ) throws FeedParserException;
 
 }
 

Modified: 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java
Url: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java?view=diff&rev=148951&p1=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java&r1=148950&p2=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java&r2=148951
==============================================================================
--- 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java
      (original)
+++ 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java
      Fri Jan 28 15:39:37 2005
@@ -16,27 +16,13 @@
 
 package org.apache.commons.feedparser;
 
-import java.io.*;
-import java.net.*;
-import java.util.*;
-
-import org.jdom.Element;
-import org.jdom.Attribute;
-import org.jdom.CDATA;
-import org.jdom.Text;
-import org.jdom.Comment;
-import org.jdom.output.*;
-import org.jdom.input.*;
-
-import org.jaxen.jdom.*;
-
 /**
  * Should be called prior to all use of a FeedParser.  We reserve the right to
  * change the underlying implementation of the FeedParser in the future for
  * performance reasons.
  * 
  * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton (burtonator)</a>
- * @version $Id: FeedParserFactory.java,v 1.2 2004/02/28 03:35:22 bayard Exp $
+ * @version $Id$
  */
 public class FeedParserFactory {
 
@@ -47,7 +33,7 @@
         
         try { 
             
-            return (FeedParser)FeedParser.class.newInstance();
+            return (FeedParser)new FeedParserImpl();
             
         } catch ( Throwable t ) { throw new FeedParserException( t ); }
     }

Modified: 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java
Url: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java?view=diff&rev=148951&p1=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java&r1=148950&p2=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java&r2=148951
==============================================================================
--- 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java
    (original)
+++ 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java
    Fri Jan 28 15:39:37 2005
@@ -16,25 +16,19 @@
 
 package org.apache.commons.feedparser.test;
 
-import java.applet.*;
-import java.util.*;
-import java.io.*;
-import java.net.*;
-import java.security.*;
-
-import org.peerfear.newsmonster.tools.*;
-import org.peerfear.newsmonster.network.*;
-
-import junit.framework.*;
-
-import org.apache.commons.feedparser.*;
-import org.apache.commons.feedparser.impl.*;
-import org.apache.commons.feedparser.locate.*;
+import junit.framework.TestCase;
+import org.apache.commons.feedparser.FeedParser;
+import org.apache.commons.feedparser.FeedParserFactory;
+import org.apache.commons.feedparser.impl.DebugFeedParserListener;
+
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.net.URL;
 
 /**
  *
  * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
- * @version $Id: TestFeedFilter.java,v 1.6 2005/01/19 06:01:41 burton Exp $
+ * @version $Id$
  */
 public class TestFeedFilter extends TestCase {
     protected String feedparserHome;
@@ -67,7 +61,8 @@
 
         DebugFeedParserListener listener = new DebugFeedParserListener( out );
 
-        FeedParser.parse( listener, url.openStream(), resource );
+        FeedParser parser=FeedParserFactory.newFeedParser();
+        parser.parse( listener, url.openStream(), resource );
 
         out.println( "</pre>" );
 

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to