burton 2004/10/17 17:00:09 Modified: feedparser/src/java/org/apache/commons/feedparser BaseParser.java RSSFeedParser.java Added: feedparser/tests/locale rss-2.0-en-on-channel-element.xml rss-zh-on-channel-element.xml Log: more support for languages.. this time RSS 2.0 and RSS 0.91 support with dc:language and 'language' Revision Changes Path 1.2 +23 -12 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java Index: BaseParser.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- BaseParser.java 17 Oct 2004 23:43:23 -0000 1.1 +++ BaseParser.java 18 Oct 2004 00:00:09 -0000 1.2 @@ -49,13 +49,11 @@ if ( state.metaFeedParserlistener == null ) return; - Attribute attr = getLocaleAttribute( element ); + String l = getLocaleString( element ); - if ( attr != null ) { + if ( l != null ) { - String v = attr.getValue(); - - Locale locale = RFC3066LocaleParser.parse( v ); + Locale locale = RFC3066LocaleParser.parse( l ); if ( locale != null ) state.metaFeedParserlistener.onLocale( state, locale ); @@ -72,23 +70,36 @@ if ( state.metaFeedParserlistener == null ) return; - Attribute attr = getLocaleAttribute( element ); + String l = getLocaleString( element ); - if ( attr != null ) + if ( l != null ) state.metaFeedParserlistener.onLocaleEnd(); } - protected static Attribute getLocaleAttribute( Element element ) { + protected static String getLocaleString( Element element ) { //hm.. crap. how do we get the 'xml' namespace here? Attribute attr = element.getAttribute( "lang" ); + if ( attr != null ) + return attr.getValue(); + //when stil null see that we have dc:language - if ( attr == null ) - attr = element.getAttribute( "language", NS.DC ); - return attr; + Element lang = element.getChild( "language", NS.DC ); + + if ( lang != null ) + return lang.getText(); + + //fall over to just using "language" and if it isn't a local string we + //won't parse it. This is for RSS 0.91 and RSS 2.0 content. + lang = element.getChild( "language" ); + + if ( lang != null ) + return lang.getText(); + + return null; } 1.13 +12 -8 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java Index: RSSFeedParser.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- RSSFeedParser.java 3 Sep 2004 19:46:47 -0000 1.12 +++ RSSFeedParser.java 18 Oct 2004 00:00:09 -0000 1.13 @@ -38,7 +38,7 @@ * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton (burtonator)</a> * @version $Id$ */ -public class RSSFeedParser { +public class RSSFeedParser extends BaseParser { /** * Parse the given document as an OPML document. @@ -48,7 +48,7 @@ public static void parse( FeedParserListener listener, org.jdom.Document doc ) throws Exception { - FeedParserState state = new FeedParserState(); + FeedParserState state = new FeedParserState( listener ); FeedVersion v = new FeedVersion(); v.isRSS = true; @@ -60,7 +60,9 @@ XPath xpath = new XPath( "/descendant::*[local-name() = 'channel']" ); Element channel = (Element)xpath.selectSingleNode( doc ); state.current = channel; - doParseChannel( listener, state ); + doLocale( state, listener, channel ); + doChannel( listener, state ); + doLocaleEnd( state, listener, channel ); //*** now process the image. *** xpath = new XPath( "/descendant::*[local-name() = 'image']" ); @@ -85,11 +87,13 @@ //update items. while ( i.hasNext() ) { - Element child = (Element)i.next(); + Element item = (Element)i.next(); - state.current = child; + state.current = item; + doLocale( state, listener, item ); doParseItem( listener, state ); + doLocaleEnd( state, listener, item ); } @@ -102,8 +106,8 @@ * * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a> */ - private static void doParseChannel( FeedParserListener listener, - FeedParserState state ) throws Exception { + private static void doChannel( FeedParserListener listener, + FeedParserState state ) throws Exception { String link = getChildElementTextByName( state, "link" ); 1.1 jakarta-commons-sandbox/feedparser/tests/locale/rss-2.0-en-on-channel-element.xml Index: rss-2.0-en-on-channel-element.xml =================================================================== <?xml version="1.0"?>
<!-- RSS generated by UserLand Frontier v9.0.1 on 10/17/2004; 2:40:35 PM Pacific --> <rss version="2.0"> <channel> <title>Scripting News</title> <link>http://www.scripting.com/</link> <description>It's even worse than it appears.</description> <language>en-us</language> <copyright>Copyright 1997-2004 Dave Winer</copyright> <pubDate>Sun, 17 Oct 2004 07:00:00 GMT</pubDate> <lastBuildDate>Sun, 17 Oct 2004 21:40:35 GMT</lastBuildDate> <docs>http://blogs.law.harvard.edu/tech/rss</docs> <generator>UserLand Frontier v9.0.1</generator> <managingEditor>[EMAIL PROTECTED]</managingEditor> <webMaster>[EMAIL PROTECTED]</webMaster> <item> <description><a href="http://www.dawnanddrew.com/archives/001039.php">I just listened</a> to my first Dawn & Drew podcast. They're awesome. </description> <pubDate>Sun, 17 Oct 2004 19:45:58 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:12:45:58PM</guid> </item> <item> <description>If newspaper <a href="http://www.command-post.org/2004/2_archives/016036.html">endorsements</a> were votes, Kerry wins Florida in a landslide.</description> <pubDate>Sun, 17 Oct 2004 21:40:33 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:2:40:33PM</guid> </item> <item> <description><a href="http://www.scripting.com/images/2001/09/15/usFlag.gif" title="THINK!"><img src="http://www.scripting.com/images/archiveScriptingCom/2004/05/31/think.gif" height="59" width="69" border="0" hspace="15" vspace="15" align="right" alt="THINK!"></a>Watching <a href="http://frist.senate.gov/">Bill Frist</a>, Senate Majority Leader, blame Kerry and Edwards for the failure of the Congress to pass tort reform. I thought I was hearing Bush do the same thing in the last debate. I wonder how many Americans know that Congress is controlled by the Republicans. They talk as if there were an adversarial relationship between the legislative and executive branches, which would lead people to draw the incorrect conclusion that the Democrats are responsible. I think today we got a preview of the final onslaught of ads the Republicans are going to run, and there won't be an opportunity to explain that the Democrats don't run Congress. Like the lie that Saddam Hussein was in league with Osama bin Laden, the Republicans don't mind if you draw the wrong conclusion, in fact, they'll help you do it. </description> <pubDate>Sun, 17 Oct 2004 17:44:57 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:10:44:57AM</guid> </item> <item> <description><a href="http://www.nytimes.com/2004/10/17/magazine/17BUSH.html?ei=5088&en=6a9ce1d022952b10&ex=1255752000&partner=rssnyt&pagewanted=print&position=">NY Times Magazine</a>, quoting a senior White House official, in 2002: "We're an empire now, and when we act, we create our own reality."</description> <pubDate>Sun, 17 Oct 2004 18:46:03 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:11:46:03AM</guid> </item> <item> <description><a href="http://www.bloggercon.org/2004/10/17#a1605">Adam Curry</a>: "A lot of people have been questioning the use of licensed music in Podcasts and I too feel that the time has come to face any legal ramifications of this audio wave we're riding now, and not let it take us by surprise."</description> <pubDate>Sun, 17 Oct 2004 17:24:24 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:10:24:24AM</guid> </item> <item> <description><a href="http://www.sfgate.com/cgi-bin/article.cgi?file=/chronicle/archive/2004/10/17/EDG8O98IQ01.DTL">Mitch Kapor</a>: "We were never meant to have a highly centralized government."</description> <pubDate>Sun, 17 Oct 2004 17:21:02 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:10:21:02AM</guid> </item> <item> <description><img src="http://images.scripting.com/archiveScriptingCom/2004/10/17/lessig.jpg" width="45" height="57" border="0" align="right" hspace="15" vspace="5" alt="A picture named lessig.jpg">Emailing with Larry Lessig today, he said something surprising about <a href="http://creativecommons.org/">Creative Commons</a>. "No author gives up his copyright when putting content under a CC license. A CC license is just permissions given up front. It rests upon a copyright (without the copyright, you couldn't impose the permissions). But the copyright owner holds the copyright, and just says, 'here's how you're free to use my work.'" </description> <pubDate>Sun, 17 Oct 2004 15:57:45 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:8:57:45AM</guid> </item> <item> <description>Doug Kaye <a href="http://www.itconversations.com/shows/detail225.html">interview</a> with Adam Curry.</description> <pubDate>Sun, 17 Oct 2004 12:15:27 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:5:15:27AM</guid> </item> <item> <description><a href="http://www.iol.co.za/index.php?set_id=1&click_id=2969">RSS news feeds</a> from South Africa.</description> <pubDate>Sun, 17 Oct 2004 14:21:26 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:7:21:26AM</guid> <category>/Technology/Formats and Protocols/RSS</category> </item> <item> <description><a href="http://static.podcatch.com/manila/gems/un/eps.mp3">This is a test</a>. For the next sixty seconds this station will conduct a test of the Emergency Podcast System. </description> <pubDate>Sun, 17 Oct 2004 17:36:38 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:10:36:38AM</guid> <enclosure url="http://static.podcatch.com/manila/gems/un/eps.mp3" length="189455" type="audio/mpeg" /> </item> <item> <description><a href="http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3">I got another</a> test blog post. An audio test blog post. Pay no attention to the man behind the curtain.</description> <pubDate>Sun, 17 Oct 2004 19:11:14 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:12:11:14PM</guid> <enclosure url="http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3" length="106423" type="audio/mpeg" /> </item> <item> <description><a href="http://www.mediainfo.com/eandp/news/article_display.jsp?vnu_content_id=1000671941">Editor & Publisher</a> has a list of presidential endorsements.</description> <pubDate>Sun, 17 Oct 2004 12:42:24 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:5:42:24AM</guid> </item> <item> <description><a href="http://www.undergroundclips.com/undergroundclips/2004/10/richard_clark_o.html">Undergroundclips</a> has the 60 Minutes interview with Richard Clark.</description> <pubDate>Sun, 17 Oct 2004 12:28:53 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:5:28:53AM</guid> </item> <item> <description><a href="http://www.nytimes.com/2004/10/17/arts/17rich.html?ex=1255752000&en=ca7f76fa80642517&ei=5088&partner=rssnyt">Frank Rich</a>: "Like the Nixon administration before it, the current White House has kneecapped with impunity any news organization that challenges its message."</description> <pubDate>Sun, 17 Oct 2004 14:16:02 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:7:16:02AM</guid> </item> <item> <description>The Boston Globe <a href="http://www.boston.com/business/articles/2004/10/17/harvards12_billion_man/?rss_id=Boston%20Globe%20--%20Business%20News">profiles</a> Jack Meyer, the investment banker who's in charge of Harvard's $22 billion endowment.</description> <pubDate>Sun, 17 Oct 2004 12:30:44 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:5:30:44AM</guid> </item> <item> <description><img src="http://images.scripting.com/archiveScriptingCom/2004/10/17/dubya.jpg" width="45" height="61" border="0" align="right" hspace="15" vspace="5" alt="A picture named dubya.jpg">In a speech yesterday Bush said we will not have an all-volunteer army. A few in the audience shouted, and he <a href="http://www.turkishpress.com/turkishpress/news.asp?ID=30912">flipped</a> it around. They chuckle when Bush makes a mistake, but what if Kerry had said it? Do you think the Republicans would have mocked him? Yeah, I think so. I think the Dems should run that flip-flop as an ad. Fair is fair. And unfair is fair in this election.</description> <pubDate>Sun, 17 Oct 2004 12:03:56 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:5:03:56AM</guid> </item> <item> <description><a href="http://jeremy.zawodny.com/blog/archives/002826.html">Jeremy Zawodny</a>, who works at Yahoo, says MSNBC ripped them off. </description> <pubDate>Sun, 17 Oct 2004 12:01:55 GMT</pubDate> <guid>http://archive.scripting.com/2004/10/17#When:5:01:55AM</guid> </item> </channel> </rss> 1.1 jakarta-commons-sandbox/feedparser/tests/locale/rss-zh-on-channel-element.xml Index: rss-zh-on-channel-element.xml =================================================================== <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"><channel><title>beiqiao的专栏</title><link>http://blog.csdn.net/beiqiao/</link><description /><dc:language>zh-CHS</dc:language><generator>.Text Version 0.958.2004.2001</generator><item><dc:creator>beiqiao</dc:creator><title>修复IE</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</link><pubDate>Tue, 12 Oct 2004 00:43:00 GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/132900.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/132900.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/132900.aspx</trackback:ping><description>发现中毒了,每次启动IE,会打开本地一个sp.html文件。这个文件放在windows临时目录下,即使被删除,IE启动后,又被生成。同时注册表中HKEY_LOCAL_MECHINE\Microsoft\Internet Explorer\main\search bar、search page;HKEY_LOCAL_MECHINE\Microsoft\Internet Explorer\search键值被修改成指向本地的sp.html文件,修改掉这些键值后,打开IE,又被改回来了。<img src ="http://blog.csdn.net/beiqiao/aggbug/132900.aspx" width = "1" height = "1" /></description></item><item><dc:creator>beiqiao</dc:creator><title>Linux相关命令</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</link><pubDate>Mon, 11 Oct 2004 10:55:00 GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/131800.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/131800.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/131800.aspx</trackback:ping><description>列出常用Linux命令以备查询<img src ="http://blog.csdn.net/beiqiao/aggbug/131800.aspx" width = "1" height = "1" /></description></item><item><dc:creator>beiqiao</dc:creator><title>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</link><pubDate>Sun, 10 Oct 2004 16:49:00 GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/130966.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/130966.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/130966.aspx</trackback:ping><description>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题<img src ="http://blog.csdn.net/beiqiao/aggbug/130966.aspx" width = "1" height = "1" /></description></item></channel></rss> --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]