burton 2004/09/05 14:55:51 Modified: feedparser TODO feedparser/src/java/org/apache/commons/feedparser/locate ProbeLocator.java feedparser/src/java/org/apache/commons/feedparser/test TestProbeLocator.java Log: 1/2 of brads patches Revision Changes Path 1.13 +6 -0 jakarta-commons-sandbox/feedparser/TODO Index: TODO =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/TODO,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- TODO 3 Sep 2004 19:46:47 -0000 1.12 +++ TODO 5 Sep 2004 21:55:51 -0000 1.13 @@ -1,4 +1,10 @@ +- BUG: + + Make SURE that all these tests + + http://diveintomark.org/tests/client/autodiscovery/ + - BUG: what happens when I put a comment after a UTF-16 BOM?! - Support Base64 Atom values and the ability to enable them. 1.11 +28 -37 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java Index: ProbeLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- ProbeLocator.java 2 Sep 2004 00:36:25 -0000 1.10 +++ ProbeLocator.java 5 Sep 2004 21:55:51 -0000 1.11 @@ -69,7 +69,7 @@ /** * A regex to find any trailing filename and strip it */ - private static Pattern patternToStrip = Pattern.compile("/\\w*\\.\\w*$"); + private static Pattern patternToStrip = Pattern.compile("[^/](/\\w*\\.\\w*$)"); /** * A regex to extract the user from a Xanga URL @@ -143,7 +143,8 @@ new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE), new FeedReference("rss.xml", FeedReference.RSS_MEDIA_TYPE), new FeedReference("index.rdf", FeedReference.RSS_MEDIA_TYPE), - new FeedReference("index.xml", FeedReference.XML_MEDIA_TYPE) }; + new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE), + new FeedReference("xml/rss.xml", FeedReference.RSS_MEDIA_TYPE) }; probeMapping.put( BlogService.BLOSXOM, blosxomLocations ); @@ -192,6 +193,7 @@ mapping = (FeedReference[])probeMapping.get( blogService ); log.info( "mapping = " + mapping ); + log.info( "baseFeedPath = " + baseFeedPath ); // try out each mapping for (int i = 0; i < mapping.length; i++) { @@ -285,7 +287,9 @@ * and gets the path necessary to build up a feed, such as * "http://www.codinginparadise.org/". Basicly it appends a slash to the end if there * is not one, and removes any file names that might be at the end, such as - * "myweblog.php". The one special exception is for some Blosxom blogs, + * "myweblog.php". + * + * There is a special exception for some Blosxom blogs, * which have things inside of a cgi-script and 'hang' their RSS files * off of this cgi-bin. For example, * http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file @@ -297,31 +301,40 @@ * * @author Brad Neuberg, [EMAIL PROTECTED] */ - protected static String getFeedPath(String resource) + protected static String getFeedPath( String resource ) throws MalformedURLException { // strip off any query string or anchors - int startStripAt = resource.indexOf("#"); + int end = resource.lastIndexOf( "#" ); + + if ( end != -1 ) + resource = resource.substring( 0, end ); - if ( startStripAt == -1 ) { - startStripAt = resource.indexOf("?"); - } - if (startStripAt != -1) { - resource = resource.substring(0, startStripAt); - } + end = resource.lastIndexOf( "?" ); - if (!resource.endsWith("blosxom.cgi")) { + if ( end != -1 ) + resource = resource.substring( 0, end ); + + if ( ! resource.endsWith( "blosxom.cgi" ) ) { Matcher fileMatcher = patternToStrip.matcher(resource); resource = fileMatcher.replaceAll(""); } - if ( !resource.endsWith( "/" ) ) { + if ( ! resource.endsWith( "/" ) ) { resource = resource + "/"; } return resource; } - + + public static void main( String[] args ) throws Exception { + + System.out.println( "asdf" ); + System.out.println( getFeedPath( "http://foo.com/bar?cat=dog" ) ); + System.out.println( getFeedPath( "http://foo.com/bar?cat=dog#adf" ) ); + + } + /** Does an HTTP HEAD to see if the given resource exists. * * @param resource The full URI to the resource to check for. @@ -329,13 +342,8 @@ * @author Brad Neuberg, [EMAIL PROTECTED] */ protected static boolean feedExists(String resource) throws Exception { - - log.info( "Inside feedExists, resource = "+resource ); - ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource ); - log.info( "request = "+request ); - request.setRequestMethod( "HEAD" ); request.setFollowRedirects( false ); @@ -343,7 +351,7 @@ request.getContentLength(); long response = request.getResponseCode(); - log.info( "response = " + response ); + return response == 200; } @@ -356,23 +364,6 @@ xangaMatcher.matches(); return xangaMatcher.group(1); - } - - public static void main( String[] args ) throws Exception { - - FeedList list = new FeedList(); - - locate( "http://davebarry.blogspot.com/", null, list ); - locate( "http://www.livejournal.com/users/jwz", null, list ); - - Iterator it = list.iterator(); - - while ( it.hasNext() ) { - - log.info( it.next() ); - - } - } } 1.3 +328 -3 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestProbeLocator.java Index: TestProbeLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestProbeLocator.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- TestProbeLocator.java 2 Sep 2004 00:36:25 -0000 1.2 +++ TestProbeLocator.java 5 Sep 2004 21:55:51 -0000 1.3 @@ -336,7 +336,7 @@ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); assertNull(feeds[0].title, null); assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); - assertEquals(feeds[0].resource, "http://xanga.com/rss.aspx?user=joe"); + assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=lithium98"); /* test through the FeedLocator */ list = FeedLocator.locate(resource); atomFeed = list.getAdAtomFeed(); @@ -344,7 +344,82 @@ assertNull(atomFeed); assertNotNull(rssFeed); assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); - assertEquals(rssFeed.resource, "http://xanga.com/rss.aspx?user=joe"); + assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=lithium98"); + + // This site should have an RSS feed + resource = "http://www.xanga.com/home.aspx?user=ChUnSA_86"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.XANGA); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=ChUnSA_86"); + /* test through the FeedLocator */ + list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=ChUnSA_86"); + + // This site should have an RSS feed + resource = "http://www.xanga.com/home.aspx?user=wdfphillz"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.XANGA); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=wdfphillz"); + /* test through the FeedLocator */ + list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=wdfphillz"); + + // This site should have an RSS feed + // FIXME: We should be able to pass this test when we + // expand resources inside of the Feed Parser; we don't + // currently do this yet, Brad Neuberg, [EMAIL PROTECTED] + /*resource = "http://xanga.com/home.aspx?user=joe"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.XANGA); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://xanga.com/rss.aspx?user=joe");*/ + /* test through the FeedLocator */ + /*list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://xanga.com/rss.aspx?user=joe");*/ } public FeedList test( String resource, @@ -681,6 +756,102 @@ public void testPMachine() throws Exception { FeedList list = null; String resource = null, content = null; + + // This site should have a single RSS feed + resource = "http://bamph.com"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.UNKNOWN); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://bamph.com/index.xml"); + /* test through the FeedLocator */ + list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://bamph.com/index.xml"); + + // This site should have a single RSS feed + resource = "http://bamph.com"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.UNKNOWN); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://bamph.com/index.xml"); + /* test through the FeedLocator */ + list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://bamph.com/index.xml"); + + // This site should have a single RSS feed + resource = "http://bamph.com"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.UNKNOWN); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://bamph.com/index.xml"); + /* test through the FeedLocator */ + list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://bamph.com/index.xml"); + + // This site should have a single RSS feed + resource = "http://bamph.com"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.UNKNOWN); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://bamph.com/index.xml"); + /* test through the FeedLocator */ + list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://bamph.com/index.xml"); BlogService blogService = null; FeedReference feeds[] = null; FeedReference rssFeed, atomFeed; @@ -756,6 +927,30 @@ assertNotNull(rssFeed); assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); assertEquals(rssFeed.resource, "http://www.mondfish.net/pmachine/index.xml"); + + // This site should have a single RSS feed + resource = "http://bamph.com"; + content = getContent(resource); + assertNotNull(content); + blogService = BlogServiceDiscovery.discover(resource, content); + assertEquals(blogService, BlogService.UNKNOWN); + list = new FeedList(); + ProbeLocator.locate(resource, content, list); + assertEquals(list.size(), 1); + feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); + assertEquals(feeds.length, 1); + assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY); + assertNull(feeds[0].title, null); + assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(feeds[0].resource, "http://bamph.com/index.xml"); + /* test through the FeedLocator */ + list = FeedLocator.locate(resource); + atomFeed = list.getAdAtomFeed(); + rssFeed = list.getAdRSSFeed(); + assertNull(atomFeed); + assertNotNull(rssFeed); + assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE); + assertEquals(rssFeed.resource, "http://bamph.com/index.xml"); } public void testBlosxom() throws Exception { @@ -831,6 +1026,32 @@ /* test through the FeedLocator */ list = FeedLocator.locate(resource); atomFeed = list.getAdAtomFeed(); + public static void main( String[] args ) throws Exception { + + TestProbeLocator test = new TestProbeLocator( null ); + + /*test.test( "http://xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 ); + + test.test( "http://www.xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 );*/ + + test.testBlogger(); + test.testLiveJournal(); + test.testDiaryLand(); + test.testMovableType(); + test.testXanga(); + test.testWordPress(); + test.testAOLJournal(); + test.testTypePad(); + test.testGreyMatter(); + test.testPMachine(); + test.testBlosxom(); + test.testRadioUserland(); + test.testTextPattern(); + } rssFeed = list.getAdRSSFeed(); assertNull(atomFeed); assertNotNull(rssFeed); @@ -855,6 +1076,32 @@ assertEquals(feeds[0].resource, resource + "/index.rss"); /* test through the FeedLocator */ list = FeedLocator.locate(resource); + public static void main( String[] args ) throws Exception { + + TestProbeLocator test = new TestProbeLocator( null ); + + /*test.test( "http://xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 ); + + test.test( "http://www.xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 );*/ + + test.testBlogger(); + test.testLiveJournal(); + test.testDiaryLand(); + test.testMovableType(); + test.testXanga(); + test.testWordPress(); + test.testAOLJournal(); + test.testTypePad(); + test.testGreyMatter(); + test.testPMachine(); + test.testBlosxom(); + test.testRadioUserland(); + test.testTextPattern(); + } atomFeed = list.getAdAtomFeed(); rssFeed = list.getAdRSSFeed(); assertNull(atomFeed); @@ -879,6 +1126,32 @@ assertEquals(blogService, BlogService.RADIO_USERLAND); list = new FeedList(); ProbeLocator.locate(resource, content, list); + public static void main( String[] args ) throws Exception { + + TestProbeLocator test = new TestProbeLocator( null ); + + /*test.test( "http://xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 ); + + test.test( "http://www.xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 );*/ + + test.testBlogger(); + test.testLiveJournal(); + test.testDiaryLand(); + test.testMovableType(); + test.testXanga(); + test.testWordPress(); + test.testAOLJournal(); + test.testTypePad(); + test.testGreyMatter(); + test.testPMachine(); + test.testBlosxom(); + test.testRadioUserland(); + test.testTextPattern(); + } assertEquals(list.size(), 1); feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); assertEquals(feeds.length, 1); @@ -903,6 +1176,32 @@ assertEquals(blogService, BlogService.RADIO_USERLAND); list = new FeedList(); ProbeLocator.locate(resource, content, list); + public static void main( String[] args ) throws Exception { + + TestProbeLocator test = new TestProbeLocator( null ); + + /*test.test( "http://xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 ); + + test.test( "http://www.xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 );*/ + + test.testBlogger(); + test.testLiveJournal(); + test.testDiaryLand(); + test.testMovableType(); + test.testXanga(); + test.testWordPress(); + test.testAOLJournal(); + test.testTypePad(); + test.testGreyMatter(); + test.testPMachine(); + test.testBlosxom(); + test.testRadioUserland(); + test.testTextPattern(); + } assertEquals(list.size(), 1); feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]); assertEquals(feeds.length, 1); @@ -1002,5 +1301,31 @@ return results.toString(); } + public static void main( String[] args ) throws Exception { + + TestProbeLocator test = new TestProbeLocator( null ); + + /*test.test( "http://xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 ); + + test.test( "http://www.xanga.com/home.aspx?user=joe", + BlogService.XANGA, + 1 );*/ + + test.testBlogger(); + test.testLiveJournal(); + test.testDiaryLand(); + test.testMovableType(); + test.testXanga(); + test.testWordPress(); + test.testAOLJournal(); + test.testTypePad(); + test.testGreyMatter(); + test.testPMachine(); + test.testBlosxom(); + test.testRadioUserland(); + test.testTextPattern(); + } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]