Author: burton Date: Mon Jan 31 13:29:04 2005 New Revision: 149303 URL: http://svn.apache.org/viewcvs?view=rev&rev=149303 Log: brads email added
Modified: jakarta/commons/sandbox/feedparser/trunk/TODO jakarta/commons/sandbox/feedparser/trunk/project.properties jakarta/commons/sandbox/feedparser/trunk/project.xml jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml Modified: jakarta/commons/sandbox/feedparser/trunk/TODO URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/TODO?view=diff&r1=149302&r2=149303 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/TODO (original) +++ jakarta/commons/sandbox/feedparser/trunk/TODO Mon Jan 31 13:29:04 2005 @@ -55,7 +55,10 @@ - Do we support multiple content items in Atom? -- Where do we store global options like USER_AGENT, STRICT_PARSING, and STRICT_SPEC +- Where do we store global options like USER_AGENT, STRICT_PARSING, and + STRICT_SPECw + + - Unit tests don't need to be so LOUD!! @@ -66,6 +69,16 @@ - Atom content - Atom summary - xml:base expansion + + - http://www.intertwingly.net/stories/2004/04/04/title.html + + +- http://feedparser.org/docs/html-sanitization.html#advanced.sanitization.why + + Hm... + + +- FIXME: documentation on using HTTP authentication - BUG: Modified: jakarta/commons/sandbox/feedparser/trunk/project.properties URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/project.properties?view=diff&r1=149302&r2=149303 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/project.properties (original) +++ jakarta/commons/sandbox/feedparser/trunk/project.properties Mon Jan 31 13:29:04 2005 @@ -25,3 +25,5 @@ org.xml.sax.driver=org.apache.xerces.parsers.SAXParser clover.excludes=**/Test*.java + +maven.username=burton \ No newline at end of file Modified: jakarta/commons/sandbox/feedparser/trunk/project.xml URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/project.xml?view=diff&r1=149302&r2=149303 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/project.xml (original) +++ jakarta/commons/sandbox/feedparser/trunk/project.xml Mon Jan 31 13:29:04 2005 @@ -42,7 +42,7 @@ <name>Brad Neuberg</name> <id></id> - <email></email> + <email>[EMAIL PROTECTED]</email> <organization>Rojo Networks Inc.</organization> </developer> Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java?view=diff&r1=149302&r2=149303 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java (original) +++ jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java Mon Jan 31 13:29:04 2005 @@ -51,7 +51,10 @@ FeedParserState state = new FeedParserState( listener ); FeedVersion v = new FeedVersion(); + v.isRSS = true; + v.version = doc.getRootElement().getAttributeValue( "version" ); + listener.onFeedVersion( v ); listener.init(); Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java?view=diff&r1=149302&r2=149303 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java (original) +++ jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java Mon Jan 31 13:29:04 2005 @@ -18,6 +18,7 @@ import org.apache.commons.feedparser.*; import org.apache.commons.feedparser.impl.*; +import org.apache.commons.feedparser.network.*; import java.io.*; import java.net.*; @@ -33,8 +34,10 @@ public static void main( String[] args ) throws Exception { + //create a new FeedParser... FeedParser parser = FeedParserFactory.newFeedParser(); + //create a listener for handling our callbacks FeedParserListener listener = new DefaultFeedParserListener() { public void onItem( FeedParserState state, @@ -43,19 +46,23 @@ String description, String permalink ) throws FeedParserException { - System.out.println( "Found a new published article: " + permalink ); } - }; + //specify the feed we want to fetch String resource = "http://peerfear.org/rss/index.rss"; - - InputStream is = new URL( resource ).openStream(); - - parser.parse( listener, is, resource ); + + //use the FeedParser network IO package to fetch our resource URL + ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource ); + + //grab our input stream + InputStream is = request.getInputStream(); + + //start parsing our feed and have the above onItem methods called + parser.parse( listener, is, resource ); } Modified: jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml?view=diff&r1=149302&r2=149303 ============================================================================== --- jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml (original) +++ jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml Mon Jan 31 13:29:04 2005 @@ -18,7 +18,8 @@ </p> <p> - FeedParser was the parser API designed from <a + FeedParser was the parser API designed by <a + href="http://peerfear.org">Kevin Burton</a> for <a href="http://newsmonster.org">NewsMonster</a> and has been donated to the ASF in order to continue development. </p> @@ -26,30 +27,33 @@ <p> FeedParser differs from most other RSS/Atom parsers in that it is not DOM based but event based (similar to SAX). Instead of - the low level startElement() API present in SAX, we provide high - level events based on RSS parsing information. + the low level startElement() API present in SAX, we provide + higher level events based on feed parsing information. </p> <p> Events are also given to the caller independent of the - underlying format. This is accomplished by a Feed Event Model + underlying format. This is accomplished with a Feed Event Model that isolates your application from the underlying feed format. - This enables your applications to transparently support all RSS - versions and Atom. We also hide format specific implementation - such as dates (RFC 822 in RSS 2.0 and 0.9x and ISO 8601 in RSS - 1.0 and Atom). + This enables transparent support for all RSS versions including + Atom. We also hide format specific implementation such as dates + (RFC 822 in RSS 2.0 and 0.9x and ISO 8601 in RSS 1.0 and Atom) + and other metadata. </p> <p> The FeedParser distribution also includes: - </p> <ol> <li>An implementation of RSS and Atom autodiscovery.</li> - <li>Support for all content modules including xhtml:body, - mod_content (RDF and inline), and atom:content </li> + <li> + + Support for all content modules including xhtml:body, + mod_content (RDF and inline), atom:content, and atom:summary + + </li> <li> Atom 1.0 link API as well as RSS 1.0 mod_link API @@ -57,7 +61,8 @@ <li> An HTML link parser for finding all links in an HTML source - file and expanding them to become full + file and expanding them to become full URLs instead of + relative. </li> </ol> @@ -75,8 +80,27 @@ would otherwise fail. </p> + <p> + Feed location within FeedParser is simple. Simply pass a URL to + <a href="apidocs/org/apache/commons/feedparser/locate/FeedLocator.html"> + FeedLocator + </a> which will parse your HTML for your weblog and return all + references to feeds with a + + <a href="apidocs/org/apache/commons/feedparser/FeedList.html"> + FeedList + </a> + </p> + </section> + <!-- + + Feed Type and Version Detection + Relative Link Expansion + + --> + <section name="Liberal Parsing"> <p> @@ -104,6 +128,14 @@ <dt>RSS 0.91</dt> <dt>RSS 0.92</dt> + <!-- + http://www.purplepages.ie/RSS/netscape/rss0.90.html + http://my.netscape.com/publish/formats/rss-spec-0.91.html + http://purl.org/rss/1.0/ + http://backend.userland.com/rss092 + http://backend.userland.com/rss093 + --> + <dt> <a href="http://feedvalidator.org/docs/rss2.html"> @@ -196,9 +228,16 @@ </source> <p> - This is a trivial example from the HelloFeedParser demo - distributed with the app. Other events such as onChannel, - onImage, onLink can be used to obtain additional metadata. + + This is a trivial example from the + + <a href="xref/org/apache/commons/feedparser/example/HelloFeedParser.html"> + HelloFeedParser + </a> + + demo distributed within FeedParser. Other events such as + onChannel, onImage, onLink can be used to obtain additional + metadata. </p> <p> @@ -206,7 +245,11 @@ the future as well as support for additional namespaces. For example the RSS 1.0, 2.0, and Atom specification all support different date mechanisms. The FeedParser simply passes - onCreated, onIssued methods via the MetaFeedParserListener + onCreated, onIssued methods via the + + <a href="apidocs/org/apache/commons/feedparser/MetaFeedParserListener.html"> + MetaFeedParserListener + </a> interface. </p> @@ -234,7 +277,12 @@ </p> <p> - The FeedParser includes a generic ContentFeedParserListener + The FeedParser includes a generic + + <a href="apidocs/org/apache/commons/feedparser/ContentFeedParserListener.html"> + ContentFeedParserListener + </a> + which allows you to intercept all content markup from all RSS formats including Atom. </p> @@ -300,6 +348,14 @@ used to provide a scalable system. </p> + <p> + The Network IO sets a default user agent of: + </p> + + <source> +Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:FeedParser; http://jakarta.apache.org/commons/sandbox/feedparser/) Gecko/20021130 +</source> + </section> <!-- @@ -311,6 +367,42 @@ </section> --> + + <section name="Alternative RSS/Atom and Feed Parsers"> + + <p> + If for some reason FeedParser doesn't meet you needs (and we'd + love to find out why) there are other alternatives. + </p> + + <dl> + <dt> + <a href="https://rome.dev.java.net/">Rome</a> + </dt> + + <dd> + While Rome lacks autodiscovery and a networking layer it + does provide a nice DOM API (if this is what you require) + and the developers from both projects are friendly and + cooperate. + </dd> + + <dt> + <a href="http://sourceforge.net/projects/feedparser/"> + Universal FeedParser + </a> + </dt> + + <dd> + + The Universal FeedParser is a python-based parser which + happens to conflict somewhat in our use of names. + + </dd> + + </dl> + + </section> </body> --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]