Author: burton Date: Fri Jan 28 15:39:37 2005 New Revision: 148951 URL: http://svn.apache.org/viewcvs?view=rev&rev=148951 Log: patch from Joseph Ottinger to rework the FeedParser as an interface Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java
Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java Url: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java?view=diff&rev=148951&p1=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java&r1=148950&p2=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java&r2=148951 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java (original) +++ jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParser.java Fri Jan 28 15:39:37 2005 @@ -16,23 +16,7 @@ package org.apache.commons.feedparser; -import org.apache.commons.feedparser.tools.*; - -import java.io.*; -import java.net.*; -import java.util.*; - -import org.jdom.Element; -import org.jdom.Attribute; -import org.jdom.CDATA; -import org.jdom.Text; -import org.jdom.Comment; -import org.jdom.output.*; -import org.jdom.input.*; - -import org.jaxen.jdom.*; - -import org.apache.log4j.Logger; +import java.io.InputStream; /** * This FeedParser implementation is based on JDOM and Jaxen and is based around @@ -43,9 +27,7 @@ * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton (burtonator)</a> * @version $Id$ */ -public class FeedParser { - - private static Logger log = Logger.getLogger( FeedParser.class ); +public interface FeedParser { /** * Parse this feed. @@ -54,165 +36,23 @@ * may be null but is used when an exception is thrown to aid debugging. * */ - public static void parse( FeedParserListener listener, - InputStream is , - String resource ) throws FeedParserException { - - try { - - // Need to massage our XML support for UTF-8 to prevent the dreaded - // "Invalid byte 1 of 1-byte UTF-8 sequence" content bug in some - // default feeds. This was tested a great deal under NewsMonster - // and I'm happy with the results. Within FeedParser 2.0 we will be - // using SAX2 so this won't be as big of a problem. In FeedParser - // 2.0 (or as soon as we use SAX) this code should be totally - // removed to use the original stream. - - is = getCorrectInputStream( is ); - - //OK. Now we have the right InputStream so we should build our DOM - //and exec. - DOMBuilder builder = new DOMBuilder(); - - org.jdom.Document doc = builder.build( is ); - - parse( listener, doc ); - - } catch ( FeedParserException fpe ) { - //if an explicit FeedParserException is thrown just rethrow it.. - throw fpe; - } catch ( Throwable t ) { - - //FIXME: when this is a JDOM or XML parser Exception we should - //detect when we're working with an XHTML or HTML file and then - //parse it with an XFN/XOXO event listener. - - throw new FeedParserException( t, resource ); - } - - } + public void parse( FeedParserListener listener, + InputStream is , + String resource ) throws FeedParserException; + /** - * Perform the Xerces UTF8 correction and FeedFilter. - * - * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a> + * @deprecated Use #parse( FeedParserException, InputStream, String ) */ - private static InputStream getCorrectInputStream( InputStream is ) - throws Exception { - - byte[] bytes = toByteArray( is ); - - //FIXME: if we return the WRONG content type here we will break. - //getBytes()... UTF-16 and UTF-32 especially. We should also perform - //HTTP Content-Type parsing here to preserve the content type. This can - //be fixed by integrating our networking API from NewsMonster. - - String encoding = XMLEncodingParser.parse( bytes ); - - if ( encoding == null ) - encoding = "UTF-8"; - - if ( encoding.startsWith( "UTF" ) ) { - - String result = XMLCleanser.cleanse( bytes, encoding ); - bytes = FeedFilter.parse( result, encoding ); - - } else { - - bytes = FeedFilter.parse( bytes, encoding ); - - } - - //remove prefix whitespace, intern HTML entities, etc. - - //build an input stream from the our bytes for parsing... - is = new ByteArrayInputStream( bytes ); - - return is; - - } + public void parse( FeedParserListener listener, + InputStream is ) throws FeedParserException; /** * Parse this feed. * - * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a> */ - public static void parse( FeedParserListener listener, - org.jdom.Document doc ) throws FeedParserException { - - try { - - String root = doc.getRootElement().getName(); - - //Handle OPML - if ( "opml".equals( root ) ) { - OPMLFeedParser.parse( listener, doc ); - return; - } - - //Handle changes.xml - if ( "weblogUpdates".equals( root ) ) { - ChangesFeedParser.parse( listener, doc ); - return; - } - - //Handle ATOM - if ( "feed".equals( root ) ) { - AtomFeedParser.parse( listener, doc ); - return; - } - - //Handle FOAF - if ( doc.getRootElement().getChildren( "Person", NS.FOAF ).size() > 0 ) { - FOAFFeedParser.parse( listener, doc ); - return; - } - - //FIXME: if this is XHTML we need to handle this with either an XFN - //or an XOXO directory parser. There might be more metadata we need - //to parse here. (also I wonder if this could be a chance to do - //autodiscovery). - - //fall back on RDF and RSS parsing. - - //FIXME: if this is an UNKNOWN format We need to throw an - //UnsupportedFeedxception (which extends FeedParserException) - // - // In this situation the ROOT elements should be: rss or RDF - - RSSFeedParser.parse( listener, doc ); - - } catch ( FeedParserException fpe ) { - //if an explicit FeedParserException is thrown just rethrow it.. - throw fpe; - } catch ( Throwable t ) { throw new FeedParserException( t ); } - - } - - /** - * Convert an InputStream to a byte array. - */ - public static byte[] toByteArray( InputStream is ) throws IOException { - - //WARNING: - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - - //now process the Reader... - byte data[] = new byte[200]; - - int readCount = 0; - - while( ( readCount = is.read( data )) > 0 ) { - - bos.write( data, 0, readCount ); - } - - is.close(); - bos.close(); - - return bos.toByteArray(); - - } + public void parse( FeedParserListener listener, + org.jdom.Document doc ) throws FeedParserException; } Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java Url: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java?view=diff&rev=148951&p1=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java&r1=148950&p2=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java&r2=148951 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java (original) +++ jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserFactory.java Fri Jan 28 15:39:37 2005 @@ -16,27 +16,13 @@ package org.apache.commons.feedparser; -import java.io.*; -import java.net.*; -import java.util.*; - -import org.jdom.Element; -import org.jdom.Attribute; -import org.jdom.CDATA; -import org.jdom.Text; -import org.jdom.Comment; -import org.jdom.output.*; -import org.jdom.input.*; - -import org.jaxen.jdom.*; - /** * Should be called prior to all use of a FeedParser. We reserve the right to * change the underlying implementation of the FeedParser in the future for * performance reasons. * * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton (burtonator)</a> - * @version $Id: FeedParserFactory.java,v 1.2 2004/02/28 03:35:22 bayard Exp $ + * @version $Id$ */ public class FeedParserFactory { @@ -47,7 +33,7 @@ try { - return (FeedParser)FeedParser.class.newInstance(); + return (FeedParser)new FeedParserImpl(); } catch ( Throwable t ) { throw new FeedParserException( t ); } } Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java Url: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java?view=diff&rev=148951&p1=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java&r1=148950&p2=jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java&r2=148951 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java (original) +++ jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/test/TestFeedFilter.java Fri Jan 28 15:39:37 2005 @@ -16,25 +16,19 @@ package org.apache.commons.feedparser.test; -import java.applet.*; -import java.util.*; -import java.io.*; -import java.net.*; -import java.security.*; - -import org.peerfear.newsmonster.tools.*; -import org.peerfear.newsmonster.network.*; - -import junit.framework.*; - -import org.apache.commons.feedparser.*; -import org.apache.commons.feedparser.impl.*; -import org.apache.commons.feedparser.locate.*; +import junit.framework.TestCase; +import org.apache.commons.feedparser.FeedParser; +import org.apache.commons.feedparser.FeedParserFactory; +import org.apache.commons.feedparser.impl.DebugFeedParserListener; + +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.net.URL; /** * * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a> - * @version $Id: TestFeedFilter.java,v 1.6 2005/01/19 06:01:41 burton Exp $ + * @version $Id$ */ public class TestFeedFilter extends TestCase { protected String feedparserHome; @@ -67,7 +61,8 @@ DebugFeedParserListener listener = new DebugFeedParserListener( out ); - FeedParser.parse( listener, url.openStream(), resource ); + FeedParser parser=FeedParserFactory.newFeedParser(); + parser.parse( listener, url.openStream(), resource ); out.println( "</pre>" ); --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]