burton 2004/09/22 18:08:31 Modified: feedparser/src/java/org/apache/commons/feedparser FeedList.java feedparser/src/java/org/apache/commons/feedparser/locate BlogService.java BlogServiceDiscovery.java FeedLocator.java ProbeLocator.java Log: commit of brads textamerica patches... Revision Changes Path 1.3 +7 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedList.java Index: FeedList.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedList.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- FeedList.java 20 Aug 2004 21:44:06 -0000 1.2 +++ FeedList.java 23 Sep 2004 01:08:31 -0000 1.3 @@ -104,6 +104,12 @@ setAdRSSFeed( ref ); } + + public void clear() { + super.clear(); + this.adAtomFeed = null; + this.adRSSFeed = null; + } } 1.2 +37 -20 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java Index: BlogService.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- BlogService.java 31 Aug 2004 23:57:31 -0000 1.1 +++ BlogService.java 23 Sep 2004 01:08:31 -0000 1.2 @@ -23,48 +23,63 @@ * @author BradNeuberg */ public class BlogService { - public static BlogService UNKNOWN = new BlogService(0); - public static BlogService DIARYLAND = new BlogService(1); - public static BlogService AOL_JOURNAL = new BlogService(2); - public static BlogService PMACHINE = new BlogService(3); - public static BlogService TEXTPATTERN = new BlogService(4); + public static BlogService UNKNOWN = new BlogService(0, false); + public static BlogService DIARYLAND = new BlogService(1, true); + public static BlogService AOL_JOURNAL = new BlogService(2, true); + public static BlogService PMACHINE = new BlogService(3, true); + public static BlogService TEXTPATTERN = new BlogService(4, true); /* FIXME: We can't detect Manila sites right now. */ - public static BlogService MANILA = new BlogService(5); - public static BlogService TYPEPAD = new BlogService(6); - public static BlogService RADIO_USERLAND = new BlogService(7); - public static BlogService LIVEJOURNAL = new BlogService(8); - public static BlogService WORDPRESS = new BlogService(9); + public static BlogService MANILA = new BlogService(5, true); + public static BlogService TYPEPAD = new BlogService(6, true); + public static BlogService RADIO_USERLAND = new BlogService(7, true); + public static BlogService LIVEJOURNAL = new BlogService(8, true); + public static BlogService WORDPRESS = new BlogService(9, true); /* FIXME: We can't detect iBlog sites right now. */ - public static BlogService IBLOG = new BlogService(10); - public static BlogService XANGA = new BlogService(11); - public static BlogService BLOSXOM = new BlogService(12); - public static BlogService BLOGGER = new BlogService(13); - public static BlogService MOVABLE_TYPE = new BlogService(14); + public static BlogService IBLOG = new BlogService(10, true); + public static BlogService XANGA = new BlogService(11, true); + public static BlogService BLOSXOM = new BlogService(12, true); + public static BlogService BLOGGER = new BlogService(13, true); + public static BlogService MOVABLE_TYPE = new BlogService(14, true); /** FIXME: No way to detect Expression Engine weblogs right now. */ - public static BlogService EXPRESSION_ENGINE = new BlogService(15); - public static BlogService GREYMATTER = new BlogService(16); + public static BlogService EXPRESSION_ENGINE = new BlogService(15, true); + public static BlogService GREYMATTER = new BlogService(16, true); + public static BlogService TEXTAMERICA = new BlogService(17, false); /** The type of BlogService this is, such as BlogService.BLOSXOM. */ private int type; + /** Whether we can trust the results of this blog service's autodiscovery + * links; for example, TextAmerica returns invalid autodiscovery results. + */ + private boolean hasValidAutodiscovery = false; + /** A private constructor to help us do type-safe enumeration. Only called * from within this class. */ - private BlogService(int type) { + private BlogService(int type, boolean hasValidAutodiscovery) { this.type = type; + this.hasValidAutodiscovery = hasValidAutodiscovery; } public int getType() { return type; } + /** Returns hether we can trust the results of this blog service's + * autodiscovery links. For example, TextAmerica returns invalid + * autodiscovery results. + */ + public boolean hasValidAutodiscovery() { + return hasValidAutodiscovery; + } + public String toString() { // use reflection to get the type string; useful so we don't have to // maintain a list of types here. Since this is only used for debugging // purposes its okay to use reflection. try { Field fields[] = getClass().getDeclaredFields(); - BlogService compareMe = new BlogService(type); + BlogService compareMe = new BlogService(type, hasValidAutodiscovery); for (int i = 0; i < fields.length; i++) { // make sure we are dealing with one of our BlogService constants if (fields[i].getType().equals(this.getClass())) { @@ -91,7 +106,9 @@ BlogService compareMe = (BlogService)obj; - return compareMe.getType() == this.type; + // we don't need to check the hasValidAutodiscovery value since equality + // is determined only by the type + return (compareMe.getType() == this.type); } public int hashCode() { 1.2 +11 -0 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java Index: BlogServiceDiscovery.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- BlogServiceDiscovery.java 31 Aug 2004 23:57:31 -0000 1.1 +++ BlogServiceDiscovery.java 23 Sep 2004 01:08:31 -0000 1.2 @@ -113,6 +113,9 @@ else if (isTextPattern(resource, content)) { return BlogService.TEXTPATTERN; } + else if (isTextAmerica(resource, content)) { + return BlogService.TEXTAMERICA; + } else { return BlogService.UNKNOWN; } @@ -261,6 +264,14 @@ Matcher blosxomMatcher = blosxomPattern.matcher(content); results = blosxomMatcher.find(); + + return results; + } + + protected static boolean isTextAmerica( String resource, String content ) { + boolean results = false; + + results = containsDomain(resource, "textamerica.com"); return results; } 1.20 +6 -8 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java Index: FeedLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- FeedLocator.java 2 Sep 2004 00:36:25 -0000 1.19 +++ FeedLocator.java 23 Sep 2004 01:08:31 -0000 1.20 @@ -88,10 +88,8 @@ //this failed... try probe location. This is more reliable than //LinkLocation but requires a few more HTTP gets. - if ( list.size() == 0 ) { - log.info( "Using ProbeLocator..." ); - ProbeLocator.locate( resource, content, list ); - } + log.info( "Using ProbeLocator..." ); + ProbeLocator.locate( resource, content, list ); return list; @@ -133,17 +131,17 @@ Iterator it = l.iterator(); if ( it.hasNext() == false ) { - System.out.println( "NO LINKS FOUND" ); + log.info( "NO LINKS FOUND" ); } - System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() ); - System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() ); + log.info( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() ); + log.info( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() ); while ( it.hasNext() ) { FeedReference ref = (FeedReference)it.next(); - System.out.println( ref.resource ); + log.info( ref.resource ); } 1.13 +24 -9 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java Index: ProbeLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- ProbeLocator.java 14 Sep 2004 01:32:04 -0000 1.12 +++ ProbeLocator.java 23 Sep 2004 01:08:31 -0000 1.13 @@ -101,7 +101,8 @@ { new FeedReference("atom.xml", FeedReference.ATOM_MEDIA_TYPE) }; FeedReference aolJournalLocations[] = - { new FeedReference("rss.xml", FeedReference.RSS_MEDIA_TYPE) }; + { new FeedReference("atom.xml", FeedReference.ATOM_MEDIA_TYPE), + new FeedReference("rss.xml", FeedReference.RSS_MEDIA_TYPE) }; FeedReference pmachineLocations[] = { new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE) }; @@ -138,6 +139,9 @@ FeedReference xangaLocations[] = { new FeedReference("rss.aspx?user=", FeedReference.RSS_MEDIA_TYPE) }; + FeedReference textAmericaLocations[] = + { new FeedReference("rss.aspx", FeedReference.RSS_MEDIA_TYPE) }; + FeedReference unknownLocations[] = { new FeedReference("atom.xml",FeedReference.ATOM_MEDIA_TYPE), new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE), @@ -166,6 +170,7 @@ probeMapping.put( BlogService.IBLOG, iBlogLocations ); probeMapping.put( BlogService.XANGA, xangaLocations); probeMapping.put( BlogService.UNKNOWN, unknownLocations ); + probeMapping.put( BlogService.TEXTAMERICA, textAmericaLocations ); } /** @@ -175,12 +180,21 @@ public static final List locate( String resource, String content, FeedList list ) throws Exception { + // determine what blog service we are dealing with + BlogService blogService = BlogServiceDiscovery.discover( resource, content ); + + // fail-fast if we already have some results and if we determine that + // we can trust the results (TextAmerica has invalid autodiscovery, + // for example) + if ( list.size() > 0 && blogService.hasValidAutodiscovery() ) + return list; + else if ( blogService.hasValidAutodiscovery() == false ) { + // clear out the list so far since we can't trust the results + list.clear(); + } + if ( BLOG_SERVICE_PROBING_ENABLED || AGGRESIVE_PROBING_ENABLED ) { - - // determine what blog service we are dealing with - - BlogService blogService = BlogServiceDiscovery.discover( resource, content ); - + String baseFeedPath = getFeedPath( resource ); FeedReference mapping[] = null; @@ -207,6 +221,7 @@ log.info( "pathToTest = " + pathToTest ); if ( feedExists( pathToTest ) ) { + log.info("Feed exists"); FeedReference feedReference = new FeedReference( pathToTest, mapping[i].type ); feedReference.method = FeedReference.METHOD_PROBE_DISCOVERY; @@ -333,9 +348,8 @@ public static void main( String[] args ) throws Exception { - System.out.println( "asdf" ); - System.out.println( getFeedPath( "http://foo.com/bar?cat=dog" ) ); - System.out.println( getFeedPath( "http://foo.com/bar?cat=dog#adf" ) ); + log.info( getFeedPath( "http://foo.com/bar?cat=dog" ) ); + log.info( getFeedPath( "http://foo.com/bar?cat=dog#adf" ) ); } @@ -355,6 +369,7 @@ request.getContentLength(); long response = request.getResponseCode(); + log.info("response="+response); return response == 200; }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]