burton      2004/10/17 17:00:09

  Modified:    feedparser/src/java/org/apache/commons/feedparser
                        BaseParser.java RSSFeedParser.java
  Added:       feedparser/tests/locale rss-2.0-en-on-channel-element.xml
                        rss-zh-on-channel-element.xml
  Log:
  more support for languages.. this time RSS 2.0 and RSS 0.91 support with dc:language 
and 'language'
  
  Revision  Changes    Path
  1.2       +23 -12    
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java
  
  Index: BaseParser.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- BaseParser.java   17 Oct 2004 23:43:23 -0000      1.1
  +++ BaseParser.java   18 Oct 2004 00:00:09 -0000      1.2
  @@ -49,13 +49,11 @@
           if ( state.metaFeedParserlistener == null )
               return;
   
  -        Attribute attr = getLocaleAttribute( element );
  +        String l = getLocaleString( element );
           
  -        if ( attr != null ) {
  +        if ( l != null ) {
   
  -            String v = attr.getValue();
  -
  -            Locale locale = RFC3066LocaleParser.parse( v );
  +            Locale locale = RFC3066LocaleParser.parse( l );
   
               if ( locale != null )
                   state.metaFeedParserlistener.onLocale( state, locale );
  @@ -72,23 +70,36 @@
           if ( state.metaFeedParserlistener == null )
               return;
   
  -        Attribute attr = getLocaleAttribute( element );
  +        String l = getLocaleString( element );
   
  -        if ( attr != null ) 
  +        if ( l != null ) 
               state.metaFeedParserlistener.onLocaleEnd();
   
       }
   
  -    protected static Attribute getLocaleAttribute( Element element ) {
  +    protected static String getLocaleString( Element element ) {
   
           //hm.. crap. how do we get the 'xml' namespace here?
           Attribute attr = element.getAttribute( "lang" );
   
  +        if ( attr != null )
  +            return attr.getValue();
  +        
           //when stil null see that we have dc:language
  -        if ( attr == null )
  -            attr = element.getAttribute( "language", NS.DC );
   
  -        return attr;
  +        Element lang = element.getChild( "language", NS.DC );
  +
  +        if ( lang != null )
  +            return lang.getText();
  +
  +        //fall over to just using "language" and if it isn't a local string we
  +        //won't parse it.  This is for RSS 0.91 and RSS 2.0 content.
  +        lang = element.getChild( "language" );
  +
  +        if ( lang != null )
  +            return lang.getText();
  +
  +        return null;
   
       }
       
  
  
  
  1.13      +12 -8     
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java
  
  Index: RSSFeedParser.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- RSSFeedParser.java        3 Sep 2004 19:46:47 -0000       1.12
  +++ RSSFeedParser.java        18 Oct 2004 00:00:09 -0000      1.13
  @@ -38,7 +38,7 @@
    * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton (burtonator)</a>
    * @version $Id$
    */
  -public class RSSFeedParser {
  +public class RSSFeedParser extends BaseParser {
   
       /**
        * Parse the given document as an OPML document.
  @@ -48,7 +48,7 @@
       public static void parse( FeedParserListener listener,
                                 org.jdom.Document doc ) throws Exception {
   
  -        FeedParserState state = new FeedParserState();
  +        FeedParserState state = new FeedParserState( listener );
   
           FeedVersion v = new FeedVersion();
           v.isRSS = true;
  @@ -60,7 +60,9 @@
           XPath xpath = new XPath( "/descendant::*[local-name() = 'channel']" );
           Element channel = (Element)xpath.selectSingleNode( doc );
           state.current = channel;
  -        doParseChannel( listener, state );
  +        doLocale( state, listener, channel );
  +        doChannel( listener, state );
  +        doLocaleEnd( state, listener, channel );
   
           //*** now process the image. ***
           xpath = new XPath( "/descendant::*[local-name() = 'image']" );
  @@ -85,11 +87,13 @@
           //update items.
           while ( i.hasNext() ) {
   
  -            Element child = (Element)i.next();
  +            Element item = (Element)i.next();
   
  -            state.current = child;
  +            state.current = item;
                   
  +            doLocale( state, listener, item );
               doParseItem( listener, state );
  +            doLocaleEnd( state, listener, item );
   
           }
   
  @@ -102,8 +106,8 @@
        *
        * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
        */
  -    private static void doParseChannel( FeedParserListener listener,
  -                                        FeedParserState state ) throws Exception {
  +    private static void doChannel( FeedParserListener listener,
  +                                   FeedParserState state ) throws Exception {
   
           String link = getChildElementTextByName( state, "link" );
   
  
  
  
  1.1                  
jakarta-commons-sandbox/feedparser/tests/locale/rss-2.0-en-on-channel-element.xml
  
  Index: rss-2.0-en-on-channel-element.xml
  ===================================================================
  <?xml version="1.0"?>

  <!-- RSS generated by UserLand Frontier v9.0.1 on 10/17/2004; 2:40:35 PM Pacific -->

  <rss version="2.0">

        <channel>

                <title>Scripting News</title>

                <link>http://www.scripting.com/</link>

                <description>It's even worse than it appears.</description>

                <language>en-us</language>

                <copyright>Copyright 1997-2004 Dave Winer</copyright>

                <pubDate>Sun, 17 Oct 2004 07:00:00 GMT</pubDate>

                <lastBuildDate>Sun, 17 Oct 2004 21:40:35 GMT</lastBuildDate>

                <docs>http://blogs.law.harvard.edu/tech/rss</docs>

                <generator>UserLand Frontier v9.0.1</generator>

                <managingEditor>[EMAIL PROTECTED]</managingEditor>

                <webMaster>[EMAIL PROTECTED]</webMaster>

                <item>

                        <description>&lt;a 
href=&quot;http://www.dawnanddrew.com/archives/001039.php&quot;&gt;I just 
listened&lt;/a&gt; to my first Dawn &amp; Drew podcast. They're awesome. </description>

                        <pubDate>Sun, 17 Oct 2004 19:45:58 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:12:45:58PM</guid>

                        </item>

                <item>

                        <description>If newspaper &lt;a 
href=&quot;http://www.command-post.org/2004/2_archives/016036.html&quot;&gt;endorsements&lt;/a&gt;
 were votes, Kerry wins Florida in a landslide.</description>

                        <pubDate>Sun, 17 Oct 2004 21:40:33 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:2:40:33PM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.scripting.com/images/2001/09/15/usFlag.gif&quot; 
title=&quot;THINK!&quot;>&lt;img 
src=&quot;http://www.scripting.com/images/archiveScriptingCom/2004/05/31/think.gif&quot;
 height=&quot;59&quot; width=&quot;69&quot; border=&quot;0&quot; hspace=&quot;15&quot; 
vspace=&quot;15&quot; align=&quot;right&quot; alt=&quot;THINK!&quot;>&lt;/a>Watching 
&lt;a href=&quot;http://frist.senate.gov/&quot;&gt;Bill Frist&lt;/a&gt;, Senate 
Majority Leader, blame Kerry and Edwards for the failure of the Congress to pass tort 
reform. I thought I was hearing Bush do the same thing in the last debate. I wonder 
how many Americans know that Congress is controlled by the Republicans. They talk as 
if there were an adversarial relationship between the legislative and executive 
branches, which would lead people to draw the incorrect conclusion that the Democrats 
are responsible. I think today we got a preview of the final onslaught of ads the 
Republicans are going to run, and there won't be an opportunity to explain that the 
Democrats don't run Congress. Like the lie that Saddam Hussein was in league with 
Osama bin Laden, the Republicans don't mind if you draw the wrong conclusion, in fact, 
they'll help you do it. </description>

                        <pubDate>Sun, 17 Oct 2004 17:44:57 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:10:44:57AM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.nytimes.com/2004/10/17/magazine/17BUSH.html?ei=5088&amp;en=6a9ce1d022952b10&amp;ex=1255752000&amp;partner=rssnyt&amp;pagewanted=print&amp;position=&quot;&gt;NY
 Times Magazine&lt;/a&gt;, quoting a senior White House official, in 2002: &quot;We're 
an empire now, and when we act, we create our own reality.&quot;</description>

                        <pubDate>Sun, 17 Oct 2004 18:46:03 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:11:46:03AM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.bloggercon.org/2004/10/17#a1605&quot;&gt;Adam Curry&lt;/a&gt;: 
&quot;A lot of people have been questioning the use of licensed music in Podcasts and 
I too feel that the time has come to face any legal ramifications of this audio wave 
we're riding now, and not let it take us by surprise.&quot;</description>

                        <pubDate>Sun, 17 Oct 2004 17:24:24 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:10:24:24AM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.sfgate.com/cgi-bin/article.cgi?file=/chronicle/archive/2004/10/17/EDG8O98IQ01.DTL&quot;&gt;Mitch
 Kapor&lt;/a&gt;: &quot;We were never meant to have a highly centralized 
government.&quot;</description>

                        <pubDate>Sun, 17 Oct 2004 17:21:02 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:10:21:02AM</guid>

                        </item>

                <item>

                        <description>&lt;img 
src=&quot;http://images.scripting.com/archiveScriptingCom/2004/10/17/lessig.jpg&quot; 
width=&quot;45&quot; height=&quot;57&quot; border=&quot;0&quot; 
align=&quot;right&quot; hspace=&quot;15&quot; vspace=&quot;5&quot; alt=&quot;A picture 
named lessig.jpg&quot;&gt;Emailing with Larry Lessig today, he said something 
surprising about &lt;a href=&quot;http://creativecommons.org/&quot;&gt;Creative 
Commons&lt;/a&gt;. &quot;No author gives up his copyright when putting content under a 
CC license. A CC license is just permissions given up front. It rests upon a copyright 
(without the copyright, you couldn't impose the permissions). But the copyright owner 
holds the copyright, and just says, 'here's how you're free to use my work.'&quot; 
</description>

                        <pubDate>Sun, 17 Oct 2004 15:57:45 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:8:57:45AM</guid>

                        </item>

                <item>

                        <description>Doug Kaye &lt;a 
href=&quot;http://www.itconversations.com/shows/detail225.html&quot;&gt;interview&lt;/a&gt;
 with Adam Curry.</description>

                        <pubDate>Sun, 17 Oct 2004 12:15:27 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:5:15:27AM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.iol.co.za/index.php?set_id=1&amp;click_id=2969&quot;&gt;RSS news 
feeds&lt;/a&gt; from South Africa.</description>

                        <pubDate>Sun, 17 Oct 2004 14:21:26 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:7:21:26AM</guid>

                        <category>/Technology/Formats and Protocols/RSS</category>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://static.podcatch.com/manila/gems/un/eps.mp3&quot;&gt;This is a 
test&lt;/a&gt;. For the next sixty seconds this station will conduct a test of the 
Emergency Podcast System. </description>

                        <pubDate>Sun, 17 Oct 2004 17:36:38 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:10:36:38AM</guid>

                        <enclosure 
url="http://static.podcatch.com/manila/gems/un/eps.mp3"; length="189455" 
type="audio/mpeg" />

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3&quot;&gt;I
 got another&lt;/a&gt; test blog post. An audio test blog post. Pay no attention to 
the man behind the curtain.</description>

                        <pubDate>Sun, 17 Oct 2004 19:11:14 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:12:11:14PM</guid>

                        <enclosure 
url="http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3"; 
length="106423" type="audio/mpeg" />

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.mediainfo.com/eandp/news/article_display.jsp?vnu_content_id=1000671941&quot;&gt;Editor
 &amp; Publisher&lt;/a&gt; has a list of presidential endorsements.</description>

                        <pubDate>Sun, 17 Oct 2004 12:42:24 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:5:42:24AM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.undergroundclips.com/undergroundclips/2004/10/richard_clark_o.html&quot;&gt;Undergroundclips&lt;/a&gt;
 has the 60 Minutes interview with Richard Clark.</description>

                        <pubDate>Sun, 17 Oct 2004 12:28:53 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:5:28:53AM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://www.nytimes.com/2004/10/17/arts/17rich.html?ex=1255752000&amp;en=ca7f76fa80642517&amp;ei=5088&amp;partner=rssnyt&quot;&gt;Frank
 Rich&lt;/a&gt;: &quot;Like the Nixon administration before it, the current White 
House has kneecapped with impunity any news organization that challenges its 
message.&quot;</description>

                        <pubDate>Sun, 17 Oct 2004 14:16:02 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:7:16:02AM</guid>

                        </item>

                <item>

                        <description>The Boston Globe &lt;a 
href=&quot;http://www.boston.com/business/articles/2004/10/17/harvards12_billion_man/?rss_id=Boston%20Globe%20--%20Business%20News&quot;&gt;profiles&lt;/a&gt;
 Jack Meyer, the investment banker who's in charge of Harvard's $22 billion 
endowment.</description>

                        <pubDate>Sun, 17 Oct 2004 12:30:44 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:5:30:44AM</guid>

                        </item>

                <item>

                        <description>&lt;img 
src=&quot;http://images.scripting.com/archiveScriptingCom/2004/10/17/dubya.jpg&quot; 
width=&quot;45&quot; height=&quot;61&quot; border=&quot;0&quot; 
align=&quot;right&quot; hspace=&quot;15&quot; vspace=&quot;5&quot; alt=&quot;A picture 
named dubya.jpg&quot;&gt;In a speech yesterday Bush said we will not have an 
all-volunteer army. A few in the audience shouted, and he &lt;a 
href=&quot;http://www.turkishpress.com/turkishpress/news.asp?ID=30912&quot;&gt;flipped&lt;/a&gt;
 it around. They chuckle when Bush makes a mistake, but what if Kerry had said it? Do 
you think the Republicans would have mocked him? Yeah, I think so. I think the Dems 
should run that flip-flop as an ad. Fair is fair. And unfair is fair in this 
election.</description>

                        <pubDate>Sun, 17 Oct 2004 12:03:56 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:5:03:56AM</guid>

                        </item>

                <item>

                        <description>&lt;a 
href=&quot;http://jeremy.zawodny.com/blog/archives/002826.html&quot;&gt;Jeremy 
Zawodny&lt;/a&gt;, who works at Yahoo, says MSNBC ripped them off. </description>

                        <pubDate>Sun, 17 Oct 2004 12:01:55 GMT</pubDate>

                        
<guid>http://archive.scripting.com/2004/10/17#When:5:01:55AM</guid>

                        </item>

                </channel>

        </rss>

  
  
  
  1.1                  
jakarta-commons-sandbox/feedparser/tests/locale/rss-zh-on-channel-element.xml
  
  Index: rss-zh-on-channel-element.xml
  ===================================================================
  <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/"; 
xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"; 
xmlns:wfw="http://wellformedweb.org/CommentAPI/"; 
xmlns:slash="http://purl.org/rss/1.0/modules/slash/";><channel><title>beiqiao的专栏</title><link>http://blog.csdn.net/beiqiao/</link><description
 /><dc:language>zh-CHS</dc:language><generator>.Text Version 
0.958.2004.2001</generator><item><dc:creator>beiqiao</dc:creator><title>修复IE</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</link><pubDate>Tue,
 12 Oct 2004 00:43:00 
GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/132900.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/132900.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/132900.aspx</trackback:ping><description>发现中毒了,每次启动IE,会打开本地一个sp.html文件。这个文件放在windows临时目录下,即使被删除,IE启动后,又被生成。同时注册表中HKEY_LOCAL_MECHINE\Microsoft\Internet
 Explorer\main\search bar、search page;HKEY_LOCAL_MECHINE\Microsoft\Internet 
Explorer\search键值被修改成指向本地的sp.html文件,修改掉这些键值后,打开IE,又被改回来了。&lt;img
 src ="http://blog.csdn.net/beiqiao/aggbug/132900.aspx"; width = "1" height = "1" 
/&gt;</description></item><item><dc:creator>beiqiao</dc:creator><title>Linux相关命令</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</link><pubDate>Mon,
 11 Oct 2004 10:55:00 
GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/131800.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/131800.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/131800.aspx</trackback:ping><description>列出常用Linux命令以备查询&lt;img
 src ="http://blog.csdn.net/beiqiao/aggbug/131800.aspx"; width = "1" height = "1" 
/&gt;</description></item><item><dc:creator>beiqiao</dc:creator><title>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</link><pubDate>Sun,
 10 Oct 2004 16:49:00 
GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/130966.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/130966.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/130966.aspx</trackback:ping><description>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题&lt;img
 src ="http://blog.csdn.net/beiqiao/aggbug/130966.aspx"; width = "1" height = "1" 
/&gt;</description></item></channel></rss>
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to