burton      2004/09/30 11:00:33

  Modified:    feedparser/src/java/org/apache/commons/feedparser/locate
                        BlogService.java BlogServiceDiscovery.java
                        FeedLocator.java ProbeLocator.java
  Log:
  Fix for yahoo groups
  
  Revision  Changes    Path
  1.3       +68 -18    
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java
  
  Index: BlogService.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- BlogService.java  23 Sep 2004 01:08:31 -0000      1.2
  +++ BlogService.java  30 Sep 2004 18:00:32 -0000      1.3
  @@ -23,27 +23,59 @@
    * @author  BradNeuberg
    */
   public class BlogService {
  -    public static BlogService UNKNOWN = new BlogService(0, false);
  -    public static BlogService DIARYLAND = new BlogService(1, true);
  -    public static BlogService AOL_JOURNAL = new BlogService(2, true);
  -    public static BlogService PMACHINE = new BlogService(3, true);
  -    public static BlogService TEXTPATTERN = new BlogService(4, true);
  +
  +    public static BlogService UNKNOWN                = new BlogService(0, false);
  +    public static BlogService DIARYLAND              = new BlogService(1, true);
  +    public static BlogService AOL_JOURNAL            = new BlogService(2, true);
  +    public static BlogService PMACHINE               = new BlogService(3, true);
  +    public static BlogService TEXTPATTERN            = new BlogService(4, true);
       /* FIXME: We can't detect Manila sites right now. */
  -    public static BlogService MANILA = new BlogService(5, true);
  -    public static BlogService TYPEPAD = new BlogService(6, true);
  -    public static BlogService RADIO_USERLAND = new BlogService(7, true);
  -    public static BlogService LIVEJOURNAL = new BlogService(8, true);
  -    public static BlogService WORDPRESS = new BlogService(9, true);
  +    public static BlogService MANILA                 = new BlogService(5, true);
  +    public static BlogService TYPEPAD                = new BlogService(6, true);
  +    public static BlogService RADIO_USERLAND         = new BlogService(7, true);
  +    public static BlogService LIVEJOURNAL            = new BlogService(8, true);
  +    public static BlogService WORDPRESS              = new BlogService(9, true);
       /* FIXME: We can't detect iBlog sites right now. */
  -    public static BlogService IBLOG = new BlogService(10, true);
  -    public static BlogService XANGA = new BlogService(11, true);
  -    public static BlogService BLOSXOM = new BlogService(12, true);   
  -    public static BlogService BLOGGER = new BlogService(13, true);
  -    public static BlogService MOVABLE_TYPE = new BlogService(14, true); 
  +    public static BlogService IBLOG                  = new BlogService(10, true);
  +    public static BlogService XANGA                  = new BlogService(11, true);
  +    public static BlogService BLOSXOM                = new BlogService(12, true);   
  +    public static BlogService BLOGGER                = new BlogService(13, true);
  +    public static BlogService MOVABLE_TYPE           = new BlogService(14, true); 
       /** FIXME: No way to detect Expression Engine weblogs right now. */
  -    public static BlogService EXPRESSION_ENGINE = new BlogService(15, true);  
  -    public static BlogService GREYMATTER = new BlogService(16, true);
  -    public static BlogService TEXTAMERICA = new BlogService(17, false);
  +    public static BlogService EXPRESSION_ENGINE      = new BlogService(15, true);  
  +    public static BlogService GREYMATTER             = new BlogService(16, true);
  +    public static BlogService TEXTAMERICA            = new BlogService(17, false);
  +
  +    public static BlogService YAHOOGROUPS            = new BlogService(18, false) {
  +
  +            public String getFeedResource( String resource ) {
  +
  +//                 * Input: http://groups.yahoo.com/group/aggregators/
  +//                 *
  +//                 * Output: http://rss.groups.yahoo.com/group/aggregators/rss
  +
  +                if ( resource == null )
  +                    return null;
  +
  +                if ( resource.indexOf( "/group/" ) != -1  &&
  +                     resource.indexOf( "groups.yahoo.com" ) != -1 ) {
  +
  +                    resource = "http://rss."; +
  +                        resource.substring( "http://".length(), resource.length() )
  +                        ;
  +
  +                    if ( resource.endsWith( "/" ) ) {
  +                        resource += "rss";
  +                    } else {
  +                        resource += "/rss";
  +                    }
  +
  +                } 
  +
  +                return resource;
  +            }
  +
  +        };
       
       /** The type of BlogService this is, such as BlogService.BLOSXOM. */
       private int type;
  @@ -71,6 +103,24 @@
        */
       public boolean hasValidAutodiscovery() {
           return hasValidAutodiscovery;
  +    }
  +
  +    /**
  +     * Get the primary feed resource for this service to test.
  +     *
  +     * For example we're given the html URL and need to return the Feed URL to
  +     * test.
  +     *
  +     * Input: http://groups.yahoo.com/group/aggregators/
  +     *
  +     * Output: http://rss.groups.yahoo.com/group/aggregators/rss
  +     *
  +     * If no changes are necessary just return the input resource.
  +     * 
  +     * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
  +     */
  +    public String getFeedResource( String resource ) {
  +        return resource;
       }
       
       public String toString() {
  
  
  
  1.3       +22 -5     
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java
  
  Index: BlogServiceDiscovery.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- BlogServiceDiscovery.java 23 Sep 2004 01:08:31 -0000      1.2
  +++ BlogServiceDiscovery.java 30 Sep 2004 18:00:32 -0000      1.3
  @@ -56,6 +56,8 @@
       public static BlogService discover( String resource ) {
           return discoverService( resource, null );
       }
  +
  +    // NOTE: ALL of this code should change to a visitor pattern.  Bad design.
       
       protected static BlogService discoverService( String resource, 
                                                     String content ) {
  @@ -107,20 +109,25 @@
           else if (isManila(resource, content)) {
               return BlogService.MANILA;
           }*/
  -        else if (isRadioUserland(resource, content)) {
  +        else if ( isRadioUserland( resource, content ) ) {
               return BlogService.RADIO_USERLAND;
           }
  -        else if (isTextPattern(resource, content)) {
  +        else if ( isTextPattern( resource, content ) ) {
               return BlogService.TEXTPATTERN;
           }
  -        else if (isTextAmerica(resource, content)) {
  +        else if ( isTextAmerica( resource, content ) ) {
               return BlogService.TEXTAMERICA;
           }
  +        else if ( isYahooGroups( resource, content ) ) {
  +            return BlogService.YAHOOGROUPS;
  +        }
           else {
               return BlogService.UNKNOWN;
           }
       }
  -    
  +
  +    // **** vendor specific CMS detection code **********************************
  +
       protected static boolean isBlogger( String resource, String content ) {
           boolean results = false;
           
  @@ -275,7 +282,17 @@
           
           return results;
       }
  -    
  +
  +    protected static boolean isYahooGroups( String resource, String content ) {
  +        boolean results = false;
  +        
  +        results = containsDomain( resource, "groups.yahoo.com" );
  +        
  +        return results;
  +    }
  +
  +    // **** util code ***********************************************************
  +
       /** Determines if the given resource contains the given domain name
        *  fragment.
        */
  
  
  
  1.21      +10 -5     
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
  
  Index: FeedLocator.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- FeedLocator.java  23 Sep 2004 01:08:31 -0000      1.20
  +++ FeedLocator.java  30 Sep 2004 18:00:32 -0000      1.21
  @@ -104,7 +104,9 @@
           //String resource = "file:///projects/feedparser/tests/locate5.html";
           //String resource = "file:///projects/feedparser/tests/locate6.html";
   
  -        String resource = "file:///projects/feedparser/tests/locate8.html";
  +        String resource = "http://groups.yahoo.com/group/aggregators/";;
  +        
  +        //String resource = "file:///projects/feedparser/tests/locate8.html";
   
           //String resource = "http://blogs.sun.com/roller/page/gonzo";;
   
  @@ -126,22 +128,25 @@
           //String resource = "http://www.corante.com/strange/";;
           //String resource = "http://peerfear.org";;
   
  +        ProbeLocator.BLOG_SERVICE_PROBING_ENABLED = true;
  +        ProbeLocator.AGGRESIVE_PROBING_ENABLED = true;
  +
           FeedList l = locate( resource );
   
           Iterator it = l.iterator();
   
           if ( it.hasNext() == false ) {
  -            log.info( "NO LINKS FOUND" );
  +            System.out.println( "NO LINKS FOUND" );
           } 
   
  -        log.info( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
  -        log.info( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
  +        System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
  +        System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
           
           while ( it.hasNext() ) {
   
               FeedReference ref = (FeedReference)it.next();
   
  -            log.info( ref.resource );
  +            System.out.println( ref.resource );
               
           }
   
  
  
  
  1.14      +17 -2     
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
  
  Index: ProbeLocator.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- ProbeLocator.java 23 Sep 2004 01:08:31 -0000      1.13
  +++ ProbeLocator.java 30 Sep 2004 18:00:32 -0000      1.14
  @@ -150,6 +150,9 @@
                 new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE),
                 new FeedReference("xml/rss.xml", FeedReference.RSS_MEDIA_TYPE) };
                 
  +        FeedReference yahooGroupsLocations[] =
  +            { new FeedReference( "", FeedReference.RSS_MEDIA_TYPE) };
  +
           probeMapping.put( BlogService.BLOSXOM,        blosxomLocations );
   
           //Tue Aug 31 2004 04:21 PM ([EMAIL PROTECTED]): Diaryland doesn't
  @@ -169,8 +172,11 @@
           probeMapping.put( BlogService.WORDPRESS,      wordPressLocations );
           probeMapping.put( BlogService.IBLOG,          iBlogLocations );
           probeMapping.put( BlogService.XANGA,          xangaLocations);
  +        probeMapping.put( BlogService.YAHOOGROUPS,    yahooGroupsLocations);
  +
           probeMapping.put( BlogService.UNKNOWN,        unknownLocations );
           probeMapping.put( BlogService.TEXTAMERICA,    textAmericaLocations );
  +
       }
       
       /**
  @@ -192,9 +198,9 @@
               // clear out the list so far since we can't trust the results
               list.clear();
           }
  -        
  +
           if ( BLOG_SERVICE_PROBING_ENABLED || AGGRESIVE_PROBING_ENABLED ) {
  -           
  +
               String baseFeedPath = getFeedPath( resource );
   
               FeedReference mapping[] = null;
  @@ -212,10 +218,19 @@
                   // try out each mapping
                   for (int i = 0; i < mapping.length; i++) {
                       String pathToTest = baseFeedPath + mapping[i].resource;
  +
  +                    //FIXME: generalize this in the future.  We should NOT have
  +                    //custom tests here.
                       
                       // we have to do special probing for Xanga
                       if ( blogService.equals( BlogService.XANGA ) ) {
                           pathToTest += getXangaUser(resource);
  +                    }
  +
  +                    if ( blogService.equals( BlogService.YAHOOGROUPS ) ) {
  +
  +                        pathToTest = BlogService.YAHOOGROUPS.getFeedResource( 
resource );
  +                        
                       }
   
                       log.info( "pathToTest = " + pathToTest );
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to