http://www.mediawiki.org/wiki/Special:Code/MediaWiki/83808

Revision: 83808
Author:   bawolff
Date:     2011-03-13 09:09:39 +0000 (Sun, 13 Mar 2011)
Log Message:
-----------
Cleanup parts of GNSM, especially parts relating to the SitemapFeed class and 
FeedSMItem.
*Make it so fatal errors cannot be generated by using an invalid feed name
*Make it so no more fatal if feed=sitemap on some other page (This part is 
somewhat borked though, as it assumes a feed item title is a page title, which
is true on say Special:Recentchanges, but not ?action=history. Its not expected
that anyone would want to do that, maybe could output (500) error if not given 
a FeedSMItem(?)
*Remove usenamespace, usecurid, and supresserrors parameters. They're carried 
over from DPL, and useless here.
*Remove weird stuff with $wgLocalTimezone (Don't know what the intention with 
it was).
*Have sensible errors with 500 status code, instead of random text. Make having 
no results not be an error
since it could happen, and saying Error no results is not machine readable, 
like an xml document with
no entries (which might not be valid according to the schema, but certainly 
better than the alternative)
*Generally fix up which parameters are needed for the feed. Reduce code 
duplication with ChannelFeed class.
and unify the Sitemap and other feed types somewhat.
*Remove feedItemAuthors() and feedItemDesc that appearently are dead code
*Don't make the talk page a comment page advertized by atom. On en wikinews, 
the apropriate page is the comments namespace.
Look into how that parameter is actually used, and potentially re-add later.
*Make the atom feed component return the article as the description. Might be 
slightly inefficient (?) but so is the rest
of the extension ;). Certainly better than returning nothing
*Remove priority feature. Doesn't really make sense since we have no way of 
figuring out what has a high priority.

Modified Paths:
--------------
    trunk/extensions/GoogleNewsSitemap/FeedSMItem.php
    trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php
    trunk/extensions/GoogleNewsSitemap/SitemapFeed.php

Modified: trunk/extensions/GoogleNewsSitemap/FeedSMItem.php
===================================================================
--- trunk/extensions/GoogleNewsSitemap/FeedSMItem.php   2011-03-13 07:34:37 UTC 
(rev 83807)
+++ trunk/extensions/GoogleNewsSitemap/FeedSMItem.php   2011-03-13 09:09:39 UTC 
(rev 83808)
@@ -6,48 +6,70 @@
  **
  * Base class for basic SiteMap support, for building url containers.
  **/
-class FeedSMItem {
-       /**
-        * Var string
-        **/
-       var $url = '', $pubDate = '', $keywords = '', $lastMod = '', $priority 
= '';
+class FeedSMItem extends FeedItem {
 
-       function __construct( $url, $pubDate, $keywords = '', $lastMod = '', 
$priority = '' ) {
-               $this->url = $url;
-               $this->pubDate = $pubDate;
+       private $keywords = '';
+       private $title; // Title object, not string.
+
+       function __construct( $title, $pubDate, $keywords = '' ) {
+               parent::__construct( $title->getText(), '' /* description */, 
$title->getFullUrl(), $pubDate );
+               $this->title = $title;
                $this->keywords = $keywords;
-               $this->lastMod = $lastMod;
-               $this->priority = $priority;
        }
 
-       public function xmlEncode( $string ) {
-               $string = str_replace( "\r\n", "\n", $string );
-               $string = preg_replace( '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', '', 
$string );
-               return htmlspecialchars( $string );
+       /**
+        * Convert a FeedItem to an FeedSMItem.
+        * This is to make sitemap feed get along with normal MediaWiki feeds.
+        * @param FeedItem Original item.
+        * @return FeedSMItem Converted item.
+        */
+       static function newFromFeedItem( FeedItem $item ) {
+               // FIXME: This is borked (esp. on history), but better than a 
fatal (not by much).
+               // maybe try and get title from url?
+               $title = Title::newFromText( $item->getTitle() );
+               if ( !$title ) {
+                       throw new MWException( "Error getting title object from 
string in FeedItem." );
+               }
+               $date = $item->getDate();
+               return new FeedSMItem( $title, $date );
        }
 
-       public function getUrl() {
-               return $this->url;
-       }
-
-       public function getPriority() {
-               return $this->priority;
-       }
-
        public function getLastMod() {
-               return $this->lastMod;
+               return $this->title->getTouched();
        }
 
-       public function getKeywords () {
+       public function getKeywords() {
                return $this->xmlEncode( $this->keywords );
        }
 
-       public function getPubDate() {
-               return $this->pubDate;
+       /**
+        * Overrides parent class. Meant to be used in rss feed.
+        * Currently return the article, its debatable if thats a good idea
+        * or not, but not sure of what better to do. Could regex the wikitext
+        * and try to return the first paragraph, but thats iffy.
+        *
+        * Note, this is only called by the atom/rss feed output, not by
+        * the sitemap output.
+        * @return String
+        */
+       public function getDescription() {
+               // This is probably rather inefficient to do for several pages
+               // but not much worse than the rest of this extension.
+               $req = new FauxRequest( array(
+                       'action' => 'parse',
+                       'page' => $this->title->getPrefixedDBKey(),
+                       'prop' => 'text',
+               ) );
+               $main = new ApiMain( $req );
+               $main->execute();
+               $data = $main->getResultData();
+               if ( isset( $data['parse']['text']['*'] ) ) {
+                       return $this->xmlEncode(
+                               $data['parse']['text']['*']
+                       );
+               } else {
+                       return '';
+               }
        }
+}
 
-       function formatTime( $ts ) {
-               // need to use RFC 822 time format at least for rss2.0
-               return gmdate( 'Y-m-d\TH:i:s', wfTimestamp( TS_UNIX, $ts ) );
-       }
-}
\ No newline at end of file

Modified: trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php
===================================================================
--- trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php       
2011-03-13 07:34:37 UTC (rev 83807)
+++ trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php       
2011-03-13 09:09:39 UTC (rev 83808)
@@ -20,26 +20,10 @@
  *       * redirects = string ; default = exclude
  *       * stablepages = string ; default = null
  *       * qualitypages = string ; default = null
- *       * feed = string ; default = atom
- *     usenamespace = bool ; default = false
- *     usecurid = bool ; default = false
- *     suppresserrors = bool ; default = false
+ *       * feed = string ; default = sitemap
  **/
 
 class GoogleNewsSitemap extends SpecialPage {
-       /**
-        * FIXME: Some of this might need a config eventually
-        * @var string
-        **/
-       var $Title = '';
-       var $Description = '';
-       var $Url = '';
-       var $Date = '';
-       var $Author = '';
-       var $pubDate = '';
-       var $keywords = '';
-       var $lastMod = '';
-       var $priority = '';
 
        /**
         * Script default values - correctly spelt, naming standard.
@@ -66,37 +50,29 @@
         * main()
         **/
        public function execute( $par ) {
-               global $wgUser, $wgLang, $wgContLang, $wgRequest, $wgOut,
-                       $wgSitename, $wgServer, $wgScriptPath, $wgFeedClasses,
-                       $wgLocaltimezone;
+               global $wgContLang, $wgSitename, $wgFeedClasses, 
$wgLanguageCode;
 
-               // Not sure how clean $wgLocaltimezone is
-               // In fact, it's default setting is null...
-               if ( null == $wgLocaltimezone ) {
-                       $wgLocaltimezone = date_default_timezone_get();
-               }
-               date_default_timezone_set( $wgLocaltimezone );
-               // $url = __FILE__;
-
                $this->unload_params(); // populates this->params as a side 
effect
 
                // if there's an error parsing the params, bail out and return
                if ( isset( $this->params['error'] ) ) {
-                       if ( false == $this->params['suppressErrors'] ) {
-                               $wgOut->disable();
-                               echo $this->params['error'];
-                       }
+                       wfHttpError( 500, "Internal Server Error", 
$this->params['error'] );
                        return;
                }
 
-
-               $feed = new $wgFeedClasses[ $this->params['feed'] ](
-                               $wgSitename,
-                               $wgSitename . ' ' . $this->params['feed'] . ' 
feed',
-                               $wgServer . $wgScriptPath,
-                               date( DATE_ATOM ),
-                               $wgSitename
+               // Check to make sure that feed type is supported.
+               if ( FeedUtils::checkFeedOutput( $this->params['feed'] ) ) {
+                       // TODO: should feed title be a message.
+                       $feed = new $wgFeedClasses[ $this->params['feed'] ](
+                               $wgSitename . " [$wgLanguageCode] "
+                                       . $wgContLang->uc( 
$this->params['feed'] ) . ' feed',
+                               wfMsgExt( 'tagline', 'parsemag' ),
+                               Title::newMainPage()->getFullUrl()
                        );
+               } else {
+                       // Can't really do anything if wrong feed type.
+                       return;
+               }
 
                $res = $this->doQuery();
 
@@ -109,37 +85,15 @@
                                return;
                        }
 
-                       if ( 'sitemap' == $this->params['feed'] ) {
+                       // Fixme: Under what circumstance would cl_timestamp 
not be set?
+                       // possibly worth an exception if that happens.
+                       $this->pubDate = isset( $row->cl_timestamp ) ? 
$row->cl_timestamp : wfTimestampNow();
 
-                               $this->pubDate = isset( $row->cl_timestamp ) ? 
$row->cl_timestamp : date( DATE_ATOM );
-                               $feedArticle = new Article( $title );
-
-                               $feedItem = new FeedSMItem(
-                                  trim( $title->getFullURL() ),
-                                  wfTimeStamp( TS_ISO_8601, $this->pubDate ),
-                                  $this->getKeywords( $title ),
-                                  wfTimeStamp( TS_ISO_8601, 
$feedArticle->getTouched() ),
-                                  $feed->getPriority( $this->priority )
-                               );
-
-                       } elseif ( ( 'atom' == $this->params['feed'] ) || ( 
'rss' == $this->params['feed'] ) ) {
-
-                               $this->Date = isset( $row->cl_timestamp ) ? 
$row->cl_timestamp : date( DATE_ATOM );
-                               if ( isset( $row->comment ) ) {
-                                       $comments = htmlspecialchars( 
$row->comment );
-                               } else {
-                                       $talkpage = $title->getTalkPage();
-                                       $comments = $talkpage->getFullURL();
-                               }
-                               $titleText = ( true === 
$this->params['nameSpace'] ) ? $title->getPrefixedText() : $title->getText();
-                               $feedItem = new FeedItem(
-                                                               $titleText,
-                                                               
$this->feedItemDesc( $row ),
-                                                               
$title->getFullURL(),
-                                                               $this->Date,
-                                                               
$this->feedItemAuthor( $row ),
-                                                               $comments );
-                       }
+                       $feedItem = new FeedSMItem(
+                               $title,
+                               $this->pubDate,
+                               $this->getKeywords( $title )
+                       );
                        $feed->outItem( $feedItem );
 
                } // end while fetchobject
@@ -285,9 +239,6 @@
                $this->params['redirects'] = $wgRequest->getVal( 'redirects', 
'exclude' );
                $this->params['stable'] = $wgRequest->getVal( 'stable', 'only' 
);
                $this->params['quality'] = $wgRequest->getVal( 'qualitypages', 
'only' );
-               $this->params['suppressErrors'] = $wgRequest->getBool( 
'supresserrors', false );
-               $this->params['useNameSpace'] = $wgRequest->getBool( 
'usenamespace', false );
-               $this->params['useCurId'] = $wgRequest->getBool( 'usecurid', 
false );
                $this->params['feed'] = $wgRequest->getVal( 'feed', 'sitemap' );
 
                $this->params['catCount'] = count( $this->categories );
@@ -317,14 +268,6 @@
 
        }
 
-       function feedItemAuthor( $row ) {
-               return isset( $row->user_text ) ? $row->user_text : 'Wikinews';
-       }
-
-       function feedItemDesc( $row ) {
-               return isset( $row->comment ) ? htmlspecialchars( $row->comment 
) : '';
-       }
-
        /**
         * @param Title $title
         * @return string

Modified: trunk/extensions/GoogleNewsSitemap/SitemapFeed.php
===================================================================
--- trunk/extensions/GoogleNewsSitemap/SitemapFeed.php  2011-03-13 07:34:37 UTC 
(rev 83807)
+++ trunk/extensions/GoogleNewsSitemap/SitemapFeed.php  2011-03-13 09:09:39 UTC 
(rev 83808)
@@ -1,36 +1,29 @@
 <?php
 if ( !defined( 'MEDIAWIKI' ) ) die();
 
-class SitemapFeed extends FeedSMItem {
+class SitemapFeed extends ChannelFeed {
        private $writer;
 
        function __construct() {
-               global $wgOut;
                $this->writer = new XMLWriter();
-               $wgOut->disable();
        }
 
+       function contentType() {
+               return 'application/xml';
+       }
+
        /**
-        * Output feed headers
+        * Output feed headers.
         */
        function outHeader() {
-               global $wgOut,  $wgRequest;
+               $this->httpHeaders();
 
-               // FIXME: Why can't we just pick one mime type and always send 
that?
-               $ctype = $wgRequest->getVal( 'ctype', 'application/xml' );
-               $allowedctypes = array( 'application/xml', 'text/xml', 
'application/rss+xml', 'application/atom+xml' );
-               $mimetype = in_array( $ctype, $allowedctypes ) ? $ctype : 
'application/xml';
-
-               header( "Content-type: $mimetype; charset=UTF-8" );
-               $wgOut->sendCacheControl();
-
                $this->writer->openURI( 'php://output' );
                $this->writer->setIndent( true );
                $this->writer->startDocument( "1.0", "UTF-8" );
                $this->writer->startElement( "urlset" );
                $this->writer->writeAttribute( "xmlns", 
"http://www.sitemaps.org/schemas/sitemap/0.9"; );
                $this->writer->writeAttribute( "xmlns:news", 
"http://www.google.com/schemas/sitemap-news/0.9"; );
-               $this->writer->flush();
        }
 
        /**
@@ -38,30 +31,42 @@
         * @param FeedSMItem $item to be output
         */
        function outItem( $item ) {
+
+               if ( !( $item instanceof FeedItem ) ) {
+                       throw new MWException( "Requires a FeedItem or 
subclass." );
+               }
+               if ( !( $item instanceof FeedSMItem ) ) {
+                       $item = FeedSMItem::newFromFeedItem( $item );
+               }
+
                $this->writer->startElement( "url" );
+
                $this->writer->startElement( "loc" );
                $this->writer->text( $item->getUrl() );
                $this->writer->endElement();
+
                $this->writer->startElement( "news:news"; );
+
                $this->writer->startElement( "news:publication_date"; );
-               $this->writer->text( $item->getPubDate() );
+               $this->writer->text( wfTimestamp( TS_ISO_8601, $item->getDate() 
) );
                $this->writer->endElement();
+
+               $this->writer->startElement( "news:title"; );
+               $this->writer->text( $item->getTitle() );
+               $this->writer->endElement();
+
                if ( $item->getKeywords() ) {
                        $this->writer->startElement( "news:keywords"; );
                        $this->writer->text( $item->getKeywords() );
                        $this->writer->endElement();
                }
+
                $this->writer->endElement(); // end news:news
                if ( $item->getLastMod() ) {
                        $this->writer->startElement( "lastmod" );
-                       $this->writer->text( $item->getLastMod() );
+                       $this->writer->text( wfTimestamp( TS_ISO_8601, 
$item->getLastMod() ) );
                        $this->writer->endElement();
                }
-               if ( $item->getPriority() ) {
-                       $this->writer->startElement( "priority" );
-                       $this->writer->text( $item->getPriority() );
-                       $this->writer->endElement();
-               }
                $this->writer->endElement(); // end url
        }
 


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to