burton 2004/08/05 18:12:13 Modified: feedparser/src/java/org/apache/commons/feedparser/locate FeedLocator.java LinkLocator.java Log: We're a bit more aggressive about doing LinkLocation... we also try to handle using RSS formats correctly and prefer richer metadata Revision Changes Path 1.9 +12 -8 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java Index: FeedLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- FeedLocator.java 4 Aug 2004 22:17:01 -0000 1.8 +++ FeedLocator.java 6 Aug 2004 01:12:12 -0000 1.9 @@ -64,14 +64,13 @@ DiscoveryLocator.locate( resource, content, list ); - //this failed... try probe location - //FIXME: if we still fail try location link probing /index.rdf, /index.xml - if ( list.size() == 0 ) - ProbeLocator.locate( resource, content, list ); - //this failed... try looking for links + LinkLocator.locate( resource, content, list ); + + //this failed... try probe location. This is more reliable than + //LinkLocation but requires a few more HTTP gets. if ( list.size() == 0 ) - LinkLocator.locate( resource, content, list ); + ProbeLocator.locate( resource, content, list ); //FIXME: if we faile to locate with location with link discovery. @@ -87,12 +86,14 @@ public static void main( String[] args ) throws Exception { //This should find http://www.electoral-vote.com/index.rss - String resource = "http://www.electoral-vote.com/"; + //String resource = "http://brendonwilson.com/"; + + String resource = "file:///projects/feedparser/tests/locate4.html"; //String resource = "http://www.corante.com/strange/"; //String resource = "http://peerfear.org"; - List l = locate( resource ); + FeedList l = locate( resource ); Iterator it = l.iterator(); @@ -100,6 +101,9 @@ System.out.println( "NO LINKS FOUND" ); } + System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() ); + System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() ); + while ( it.hasNext() ) { FeedReference ref = (FeedReference)it.next(); 1.4 +54 -4 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java Index: LinkLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- LinkLocator.java 4 Aug 2004 22:17:02 -0000 1.3 +++ LinkLocator.java 6 Aug 2004 01:12:12 -0000 1.4 @@ -40,6 +40,13 @@ final FeedList list ) throws Exception { + /** + * When we have been given feeds at a higher level (via <link rel> tags + * we should prefer these. + */ + final boolean hasExplicitRSSFeed = list.getAdRSSFeed() != null; + final boolean hasExplicitAtomFeed = list.getAdRSSFeed() != null; + AnchorParserListener listener = new AnchorParserListener() { String resource = null; @@ -48,6 +55,9 @@ HashSet seen = new HashSet(); + boolean hasFoundRSSFeed = false; + boolean hasFoundAtomFeed = false; + public void setContext( Object context ) { resource = (String)context; @@ -60,11 +70,13 @@ public Object getResult() { return list; } - + public boolean onAnchor( String href, String rel, String title ) { String current = ResourceExpander.expand( resource, href ); + System.out.println( " FIXME: (debug): current: " + current ); + if ( current == null ) return true; //obviously not @@ -103,8 +115,13 @@ FeedReference ref = new FeedReference( current, FeedReference.RSS_MEDIA_TYPE ); + //Make sure to preserve existing AD feeds first. + if ( ! hasExplicitRSSFeed ) + list.setAdRSSFeed( ref ); + list.add( ref ); - list.setAdRSSFeed( ref ); + + hasFoundRSSFeed = true; } @@ -113,16 +130,49 @@ FeedReference ref = new FeedReference( current, FeedReference.ATOM_MEDIA_TYPE ); + //Make sure to preserve existing AD feeds first. + if ( ! hasExplicitAtomFeed ) + list.setAdAtomFeed( ref ); + list.add( ref ); - list.setAdAtomFeed( ref ); + + hasFoundAtomFeed = true; } if ( current.endsWith( ".xml" ) || current.endsWith( ".rdf" ) ) { + //NOTE that we do allow autodiscovery forfor index.xml + //and index.rdf files but we don't prefer them since + //these extensions are generic. We would prefer to use + //index.rss or even Atom (though people tend to use Atom + //autodiscovery now). This is important because if we + //spit back an index.xml file thats NOT RSS or worse an + //index.rdf file thats FOAF then we might break callers. + + FeedReference ref = new FeedReference( current, + FeedReference.ATOM_MEDIA_TYPE ); + + //see if we should RESORT to using this. + + if ( ! hasExplicitRSSFeed && ! hasFoundRSSFeed ) { + + //NOTE: when we have found an existing RDF file use + //that instead.. This is probably RSS 1.0 which is + //much better than RSS 0.91 + + if ( list.getAdRSSFeed() == null || + list.getAdRSSFeed().resource.endsWith( ".rdf" ) == false ) { + + list.setAdRSSFeed( ref ); + + } + + } + //feed for this blog. - list.add( current ); + list.add( ref ); return true; }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]