burton 2004/09/30 11:00:33 Modified: feedparser/src/java/org/apache/commons/feedparser/locate BlogService.java BlogServiceDiscovery.java FeedLocator.java ProbeLocator.java Log: Fix for yahoo groups Revision Changes Path 1.3 +68 -18 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java Index: BlogService.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- BlogService.java 23 Sep 2004 01:08:31 -0000 1.2 +++ BlogService.java 30 Sep 2004 18:00:32 -0000 1.3 @@ -23,27 +23,59 @@ * @author BradNeuberg */ public class BlogService { - public static BlogService UNKNOWN = new BlogService(0, false); - public static BlogService DIARYLAND = new BlogService(1, true); - public static BlogService AOL_JOURNAL = new BlogService(2, true); - public static BlogService PMACHINE = new BlogService(3, true); - public static BlogService TEXTPATTERN = new BlogService(4, true); + + public static BlogService UNKNOWN = new BlogService(0, false); + public static BlogService DIARYLAND = new BlogService(1, true); + public static BlogService AOL_JOURNAL = new BlogService(2, true); + public static BlogService PMACHINE = new BlogService(3, true); + public static BlogService TEXTPATTERN = new BlogService(4, true); /* FIXME: We can't detect Manila sites right now. */ - public static BlogService MANILA = new BlogService(5, true); - public static BlogService TYPEPAD = new BlogService(6, true); - public static BlogService RADIO_USERLAND = new BlogService(7, true); - public static BlogService LIVEJOURNAL = new BlogService(8, true); - public static BlogService WORDPRESS = new BlogService(9, true); + public static BlogService MANILA = new BlogService(5, true); + public static BlogService TYPEPAD = new BlogService(6, true); + public static BlogService RADIO_USERLAND = new BlogService(7, true); + public static BlogService LIVEJOURNAL = new BlogService(8, true); + public static BlogService WORDPRESS = new BlogService(9, true); /* FIXME: We can't detect iBlog sites right now. */ - public static BlogService IBLOG = new BlogService(10, true); - public static BlogService XANGA = new BlogService(11, true); - public static BlogService BLOSXOM = new BlogService(12, true); - public static BlogService BLOGGER = new BlogService(13, true); - public static BlogService MOVABLE_TYPE = new BlogService(14, true); + public static BlogService IBLOG = new BlogService(10, true); + public static BlogService XANGA = new BlogService(11, true); + public static BlogService BLOSXOM = new BlogService(12, true); + public static BlogService BLOGGER = new BlogService(13, true); + public static BlogService MOVABLE_TYPE = new BlogService(14, true); /** FIXME: No way to detect Expression Engine weblogs right now. */ - public static BlogService EXPRESSION_ENGINE = new BlogService(15, true); - public static BlogService GREYMATTER = new BlogService(16, true); - public static BlogService TEXTAMERICA = new BlogService(17, false); + public static BlogService EXPRESSION_ENGINE = new BlogService(15, true); + public static BlogService GREYMATTER = new BlogService(16, true); + public static BlogService TEXTAMERICA = new BlogService(17, false); + + public static BlogService YAHOOGROUPS = new BlogService(18, false) { + + public String getFeedResource( String resource ) { + +// * Input: http://groups.yahoo.com/group/aggregators/ +// * +// * Output: http://rss.groups.yahoo.com/group/aggregators/rss + + if ( resource == null ) + return null; + + if ( resource.indexOf( "/group/" ) != -1 && + resource.indexOf( "groups.yahoo.com" ) != -1 ) { + + resource = "http://rss." + + resource.substring( "http://".length(), resource.length() ) + ; + + if ( resource.endsWith( "/" ) ) { + resource += "rss"; + } else { + resource += "/rss"; + } + + } + + return resource; + } + + }; /** The type of BlogService this is, such as BlogService.BLOSXOM. */ private int type; @@ -71,6 +103,24 @@ */ public boolean hasValidAutodiscovery() { return hasValidAutodiscovery; + } + + /** + * Get the primary feed resource for this service to test. + * + * For example we're given the html URL and need to return the Feed URL to + * test. + * + * Input: http://groups.yahoo.com/group/aggregators/ + * + * Output: http://rss.groups.yahoo.com/group/aggregators/rss + * + * If no changes are necessary just return the input resource. + * + * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a> + */ + public String getFeedResource( String resource ) { + return resource; } public String toString() { 1.3 +22 -5 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java Index: BlogServiceDiscovery.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- BlogServiceDiscovery.java 23 Sep 2004 01:08:31 -0000 1.2 +++ BlogServiceDiscovery.java 30 Sep 2004 18:00:32 -0000 1.3 @@ -56,6 +56,8 @@ public static BlogService discover( String resource ) { return discoverService( resource, null ); } + + // NOTE: ALL of this code should change to a visitor pattern. Bad design. protected static BlogService discoverService( String resource, String content ) { @@ -107,20 +109,25 @@ else if (isManila(resource, content)) { return BlogService.MANILA; }*/ - else if (isRadioUserland(resource, content)) { + else if ( isRadioUserland( resource, content ) ) { return BlogService.RADIO_USERLAND; } - else if (isTextPattern(resource, content)) { + else if ( isTextPattern( resource, content ) ) { return BlogService.TEXTPATTERN; } - else if (isTextAmerica(resource, content)) { + else if ( isTextAmerica( resource, content ) ) { return BlogService.TEXTAMERICA; } + else if ( isYahooGroups( resource, content ) ) { + return BlogService.YAHOOGROUPS; + } else { return BlogService.UNKNOWN; } } - + + // **** vendor specific CMS detection code ********************************** + protected static boolean isBlogger( String resource, String content ) { boolean results = false; @@ -275,7 +282,17 @@ return results; } - + + protected static boolean isYahooGroups( String resource, String content ) { + boolean results = false; + + results = containsDomain( resource, "groups.yahoo.com" ); + + return results; + } + + // **** util code *********************************************************** + /** Determines if the given resource contains the given domain name * fragment. */ 1.21 +10 -5 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java Index: FeedLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v retrieving revision 1.20 retrieving revision 1.21 diff -u -r1.20 -r1.21 --- FeedLocator.java 23 Sep 2004 01:08:31 -0000 1.20 +++ FeedLocator.java 30 Sep 2004 18:00:32 -0000 1.21 @@ -104,7 +104,9 @@ //String resource = "file:///projects/feedparser/tests/locate5.html"; //String resource = "file:///projects/feedparser/tests/locate6.html"; - String resource = "file:///projects/feedparser/tests/locate8.html"; + String resource = "http://groups.yahoo.com/group/aggregators/"; + + //String resource = "file:///projects/feedparser/tests/locate8.html"; //String resource = "http://blogs.sun.com/roller/page/gonzo"; @@ -126,22 +128,25 @@ //String resource = "http://www.corante.com/strange/"; //String resource = "http://peerfear.org"; + ProbeLocator.BLOG_SERVICE_PROBING_ENABLED = true; + ProbeLocator.AGGRESIVE_PROBING_ENABLED = true; + FeedList l = locate( resource ); Iterator it = l.iterator(); if ( it.hasNext() == false ) { - log.info( "NO LINKS FOUND" ); + System.out.println( "NO LINKS FOUND" ); } - log.info( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() ); - log.info( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() ); + System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() ); + System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() ); while ( it.hasNext() ) { FeedReference ref = (FeedReference)it.next(); - log.info( ref.resource ); + System.out.println( ref.resource ); } 1.14 +17 -2 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java Index: ProbeLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- ProbeLocator.java 23 Sep 2004 01:08:31 -0000 1.13 +++ ProbeLocator.java 30 Sep 2004 18:00:32 -0000 1.14 @@ -150,6 +150,9 @@ new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE), new FeedReference("xml/rss.xml", FeedReference.RSS_MEDIA_TYPE) }; + FeedReference yahooGroupsLocations[] = + { new FeedReference( "", FeedReference.RSS_MEDIA_TYPE) }; + probeMapping.put( BlogService.BLOSXOM, blosxomLocations ); //Tue Aug 31 2004 04:21 PM ([EMAIL PROTECTED]): Diaryland doesn't @@ -169,8 +172,11 @@ probeMapping.put( BlogService.WORDPRESS, wordPressLocations ); probeMapping.put( BlogService.IBLOG, iBlogLocations ); probeMapping.put( BlogService.XANGA, xangaLocations); + probeMapping.put( BlogService.YAHOOGROUPS, yahooGroupsLocations); + probeMapping.put( BlogService.UNKNOWN, unknownLocations ); probeMapping.put( BlogService.TEXTAMERICA, textAmericaLocations ); + } /** @@ -192,9 +198,9 @@ // clear out the list so far since we can't trust the results list.clear(); } - + if ( BLOG_SERVICE_PROBING_ENABLED || AGGRESIVE_PROBING_ENABLED ) { - + String baseFeedPath = getFeedPath( resource ); FeedReference mapping[] = null; @@ -212,10 +218,19 @@ // try out each mapping for (int i = 0; i < mapping.length; i++) { String pathToTest = baseFeedPath + mapping[i].resource; + + //FIXME: generalize this in the future. We should NOT have + //custom tests here. // we have to do special probing for Xanga if ( blogService.equals( BlogService.XANGA ) ) { pathToTest += getXangaUser(resource); + } + + if ( blogService.equals( BlogService.YAHOOGROUPS ) ) { + + pathToTest = BlogService.YAHOOGROUPS.getFeedResource( resource ); + } log.info( "pathToTest = " + pathToTest );
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]