burton      2004/08/05 18:12:13

  Modified:    feedparser/src/java/org/apache/commons/feedparser/locate
                        FeedLocator.java LinkLocator.java
  Log:
  We're a bit more aggressive about doing LinkLocation... we also try to handle using 
RSS formats correctly and prefer richer metadata
  
  Revision  Changes    Path
  1.9       +12 -8     
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
  
  Index: FeedLocator.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- FeedLocator.java  4 Aug 2004 22:17:01 -0000       1.8
  +++ FeedLocator.java  6 Aug 2004 01:12:12 -0000       1.9
  @@ -64,14 +64,13 @@
   
           DiscoveryLocator.locate( resource, content, list );
   
  -        //this failed... try probe location
  -        //FIXME: if we still fail try location link probing /index.rdf, /index.xml
  -        if ( list.size() == 0 )
  -            ProbeLocator.locate( resource, content, list );
  -
           //this failed... try looking for links
  +        LinkLocator.locate( resource, content, list );
  +
  +        //this failed... try probe location.  This is more reliable than
  +        //LinkLocation but requires a few more HTTP gets.
           if ( list.size() == 0 )
  -            LinkLocator.locate( resource, content, list );
  +            ProbeLocator.locate( resource, content, list );
   
           //FIXME: if we faile to locate with location with link discovery.
   
  @@ -87,12 +86,14 @@
       public static void main( String[] args ) throws Exception {
   
           //This should find http://www.electoral-vote.com/index.rss
  -        String resource = "http://www.electoral-vote.com/";;
  +        //String resource = "http://brendonwilson.com/";;
  +
  +        String resource = "file:///projects/feedparser/tests/locate4.html";
   
           //String resource = "http://www.corante.com/strange/";;
           //String resource = "http://peerfear.org";;
   
  -        List l = locate( resource );
  +        FeedList l = locate( resource );
   
           Iterator it = l.iterator();
   
  @@ -100,6 +101,9 @@
               System.out.println( "NO LINKS FOUND" );
           } 
   
  +        System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
  +        System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
  +        
           while ( it.hasNext() ) {
   
               FeedReference ref = (FeedReference)it.next();
  
  
  
  1.4       +54 -4     
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java
  
  Index: LinkLocator.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- LinkLocator.java  4 Aug 2004 22:17:02 -0000       1.3
  +++ LinkLocator.java  6 Aug 2004 01:12:12 -0000       1.4
  @@ -40,6 +40,13 @@
                                        final FeedList list )
           throws Exception {
   
  +        /**
  +         * When we have been given feeds at a higher level (via <link rel> tags
  +         * we should prefer these.
  +         */
  +        final boolean hasExplicitRSSFeed = list.getAdRSSFeed() != null;
  +        final boolean hasExplicitAtomFeed = list.getAdRSSFeed() != null;
  +
           AnchorParserListener listener = new AnchorParserListener() {
   
                   String resource = null;
  @@ -48,6 +55,9 @@
   
                   HashSet seen = new HashSet();
   
  +                boolean hasFoundRSSFeed = false;
  +                boolean hasFoundAtomFeed = false;
  +                
                   public void setContext( Object context ) {
   
                       resource = (String)context;
  @@ -60,11 +70,13 @@
                   public Object getResult() {
                       return list;
                   }
  -                
  +
                   public boolean onAnchor( String href, String rel, String title ) {
   
                       String current = ResourceExpander.expand( resource, href );
   
  +                    System.out.println( " FIXME: (debug): current: " + current );
  +
                       if ( current == null )
                           return true; //obviously not
   
  @@ -103,8 +115,13 @@
                           FeedReference ref = new FeedReference( current,
                                                                  
FeedReference.RSS_MEDIA_TYPE );
   
  +                        //Make sure to preserve existing AD feeds first.
  +                        if ( ! hasExplicitRSSFeed )
  +                            list.setAdRSSFeed( ref );
  +
                           list.add( ref );
  -                        list.setAdRSSFeed( ref );
  +
  +                        hasFoundRSSFeed = true;
                           
                       }
   
  @@ -113,16 +130,49 @@
                           FeedReference ref = new FeedReference( current,
                                                                  
FeedReference.ATOM_MEDIA_TYPE );
   
  +                        //Make sure to preserve existing AD feeds first.
  +                        if ( ! hasExplicitAtomFeed )
  +                            list.setAdAtomFeed( ref );
  +
                           list.add( ref );
  -                        list.setAdAtomFeed( ref );
  +
  +                        hasFoundAtomFeed = true;
   
                       }
                       
                       if ( current.endsWith( ".xml" ) ||
                            current.endsWith( ".rdf" ) ) {
   
  +                        //NOTE that we do allow autodiscovery forfor index.xml
  +                        //and index.rdf files but we don't prefer them since
  +                        //these extensions are generic.  We would prefer to use
  +                        //index.rss or even Atom (though people tend to use Atom
  +                        //autodiscovery now).  This is important because if we
  +                        //spit back an index.xml file thats NOT RSS or worse an
  +                        //index.rdf file thats FOAF then we might break callers.
  +
  +                        FeedReference ref = new FeedReference( current,
  +                                                               
FeedReference.ATOM_MEDIA_TYPE );
  +
  +                        //see if we should RESORT to using this.
  +
  +                        if ( ! hasExplicitRSSFeed && ! hasFoundRSSFeed ) {
  +
  +                            //NOTE: when we have found an existing RDF file use
  +                            //that instead..  This is probably RSS 1.0 which is
  +                            //much better than RSS 0.91
  +
  +                            if ( list.getAdRSSFeed() == null ||
  +                                 list.getAdRSSFeed().resource.endsWith( ".rdf" ) == 
false ) {
  +
  +                                list.setAdRSSFeed( ref );
  +
  +                            }
  +
  +                        }
  +
                           //feed for this blog.
  -                        list.add( current );
  +                        list.add( ref );
                           return true;
                           
                       } 
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to