http://www.mediawiki.org/wiki/Special:Code/MediaWiki/83808
Revision: 83808 Author: bawolff Date: 2011-03-13 09:09:39 +0000 (Sun, 13 Mar 2011) Log Message: ----------- Cleanup parts of GNSM, especially parts relating to the SitemapFeed class and FeedSMItem. *Make it so fatal errors cannot be generated by using an invalid feed name *Make it so no more fatal if feed=sitemap on some other page (This part is somewhat borked though, as it assumes a feed item title is a page title, which is true on say Special:Recentchanges, but not ?action=history. Its not expected that anyone would want to do that, maybe could output (500) error if not given a FeedSMItem(?) *Remove usenamespace, usecurid, and supresserrors parameters. They're carried over from DPL, and useless here. *Remove weird stuff with $wgLocalTimezone (Don't know what the intention with it was). *Have sensible errors with 500 status code, instead of random text. Make having no results not be an error since it could happen, and saying Error no results is not machine readable, like an xml document with no entries (which might not be valid according to the schema, but certainly better than the alternative) *Generally fix up which parameters are needed for the feed. Reduce code duplication with ChannelFeed class. and unify the Sitemap and other feed types somewhat. *Remove feedItemAuthors() and feedItemDesc that appearently are dead code *Don't make the talk page a comment page advertized by atom. On en wikinews, the apropriate page is the comments namespace. Look into how that parameter is actually used, and potentially re-add later. *Make the atom feed component return the article as the description. Might be slightly inefficient (?) but so is the rest of the extension ;). Certainly better than returning nothing *Remove priority feature. Doesn't really make sense since we have no way of figuring out what has a high priority. Modified Paths: -------------- trunk/extensions/GoogleNewsSitemap/FeedSMItem.php trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php trunk/extensions/GoogleNewsSitemap/SitemapFeed.php Modified: trunk/extensions/GoogleNewsSitemap/FeedSMItem.php =================================================================== --- trunk/extensions/GoogleNewsSitemap/FeedSMItem.php 2011-03-13 07:34:37 UTC (rev 83807) +++ trunk/extensions/GoogleNewsSitemap/FeedSMItem.php 2011-03-13 09:09:39 UTC (rev 83808) @@ -6,48 +6,70 @@ ** * Base class for basic SiteMap support, for building url containers. **/ -class FeedSMItem { - /** - * Var string - **/ - var $url = '', $pubDate = '', $keywords = '', $lastMod = '', $priority = ''; +class FeedSMItem extends FeedItem { - function __construct( $url, $pubDate, $keywords = '', $lastMod = '', $priority = '' ) { - $this->url = $url; - $this->pubDate = $pubDate; + private $keywords = ''; + private $title; // Title object, not string. + + function __construct( $title, $pubDate, $keywords = '' ) { + parent::__construct( $title->getText(), '' /* description */, $title->getFullUrl(), $pubDate ); + $this->title = $title; $this->keywords = $keywords; - $this->lastMod = $lastMod; - $this->priority = $priority; } - public function xmlEncode( $string ) { - $string = str_replace( "\r\n", "\n", $string ); - $string = preg_replace( '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', '', $string ); - return htmlspecialchars( $string ); + /** + * Convert a FeedItem to an FeedSMItem. + * This is to make sitemap feed get along with normal MediaWiki feeds. + * @param FeedItem Original item. + * @return FeedSMItem Converted item. + */ + static function newFromFeedItem( FeedItem $item ) { + // FIXME: This is borked (esp. on history), but better than a fatal (not by much). + // maybe try and get title from url? + $title = Title::newFromText( $item->getTitle() ); + if ( !$title ) { + throw new MWException( "Error getting title object from string in FeedItem." ); + } + $date = $item->getDate(); + return new FeedSMItem( $title, $date ); } - public function getUrl() { - return $this->url; - } - - public function getPriority() { - return $this->priority; - } - public function getLastMod() { - return $this->lastMod; + return $this->title->getTouched(); } - public function getKeywords () { + public function getKeywords() { return $this->xmlEncode( $this->keywords ); } - public function getPubDate() { - return $this->pubDate; + /** + * Overrides parent class. Meant to be used in rss feed. + * Currently return the article, its debatable if thats a good idea + * or not, but not sure of what better to do. Could regex the wikitext + * and try to return the first paragraph, but thats iffy. + * + * Note, this is only called by the atom/rss feed output, not by + * the sitemap output. + * @return String + */ + public function getDescription() { + // This is probably rather inefficient to do for several pages + // but not much worse than the rest of this extension. + $req = new FauxRequest( array( + 'action' => 'parse', + 'page' => $this->title->getPrefixedDBKey(), + 'prop' => 'text', + ) ); + $main = new ApiMain( $req ); + $main->execute(); + $data = $main->getResultData(); + if ( isset( $data['parse']['text']['*'] ) ) { + return $this->xmlEncode( + $data['parse']['text']['*'] + ); + } else { + return ''; + } } +} - function formatTime( $ts ) { - // need to use RFC 822 time format at least for rss2.0 - return gmdate( 'Y-m-d\TH:i:s', wfTimestamp( TS_UNIX, $ts ) ); - } -} \ No newline at end of file Modified: trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php =================================================================== --- trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php 2011-03-13 07:34:37 UTC (rev 83807) +++ trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php 2011-03-13 09:09:39 UTC (rev 83808) @@ -20,26 +20,10 @@ * * redirects = string ; default = exclude * * stablepages = string ; default = null * * qualitypages = string ; default = null - * * feed = string ; default = atom - * usenamespace = bool ; default = false - * usecurid = bool ; default = false - * suppresserrors = bool ; default = false + * * feed = string ; default = sitemap **/ class GoogleNewsSitemap extends SpecialPage { - /** - * FIXME: Some of this might need a config eventually - * @var string - **/ - var $Title = ''; - var $Description = ''; - var $Url = ''; - var $Date = ''; - var $Author = ''; - var $pubDate = ''; - var $keywords = ''; - var $lastMod = ''; - var $priority = ''; /** * Script default values - correctly spelt, naming standard. @@ -66,37 +50,29 @@ * main() **/ public function execute( $par ) { - global $wgUser, $wgLang, $wgContLang, $wgRequest, $wgOut, - $wgSitename, $wgServer, $wgScriptPath, $wgFeedClasses, - $wgLocaltimezone; + global $wgContLang, $wgSitename, $wgFeedClasses, $wgLanguageCode; - // Not sure how clean $wgLocaltimezone is - // In fact, it's default setting is null... - if ( null == $wgLocaltimezone ) { - $wgLocaltimezone = date_default_timezone_get(); - } - date_default_timezone_set( $wgLocaltimezone ); - // $url = __FILE__; - $this->unload_params(); // populates this->params as a side effect // if there's an error parsing the params, bail out and return if ( isset( $this->params['error'] ) ) { - if ( false == $this->params['suppressErrors'] ) { - $wgOut->disable(); - echo $this->params['error']; - } + wfHttpError( 500, "Internal Server Error", $this->params['error'] ); return; } - - $feed = new $wgFeedClasses[ $this->params['feed'] ]( - $wgSitename, - $wgSitename . ' ' . $this->params['feed'] . ' feed', - $wgServer . $wgScriptPath, - date( DATE_ATOM ), - $wgSitename + // Check to make sure that feed type is supported. + if ( FeedUtils::checkFeedOutput( $this->params['feed'] ) ) { + // TODO: should feed title be a message. + $feed = new $wgFeedClasses[ $this->params['feed'] ]( + $wgSitename . " [$wgLanguageCode] " + . $wgContLang->uc( $this->params['feed'] ) . ' feed', + wfMsgExt( 'tagline', 'parsemag' ), + Title::newMainPage()->getFullUrl() ); + } else { + // Can't really do anything if wrong feed type. + return; + } $res = $this->doQuery(); @@ -109,37 +85,15 @@ return; } - if ( 'sitemap' == $this->params['feed'] ) { + // Fixme: Under what circumstance would cl_timestamp not be set? + // possibly worth an exception if that happens. + $this->pubDate = isset( $row->cl_timestamp ) ? $row->cl_timestamp : wfTimestampNow(); - $this->pubDate = isset( $row->cl_timestamp ) ? $row->cl_timestamp : date( DATE_ATOM ); - $feedArticle = new Article( $title ); - - $feedItem = new FeedSMItem( - trim( $title->getFullURL() ), - wfTimeStamp( TS_ISO_8601, $this->pubDate ), - $this->getKeywords( $title ), - wfTimeStamp( TS_ISO_8601, $feedArticle->getTouched() ), - $feed->getPriority( $this->priority ) - ); - - } elseif ( ( 'atom' == $this->params['feed'] ) || ( 'rss' == $this->params['feed'] ) ) { - - $this->Date = isset( $row->cl_timestamp ) ? $row->cl_timestamp : date( DATE_ATOM ); - if ( isset( $row->comment ) ) { - $comments = htmlspecialchars( $row->comment ); - } else { - $talkpage = $title->getTalkPage(); - $comments = $talkpage->getFullURL(); - } - $titleText = ( true === $this->params['nameSpace'] ) ? $title->getPrefixedText() : $title->getText(); - $feedItem = new FeedItem( - $titleText, - $this->feedItemDesc( $row ), - $title->getFullURL(), - $this->Date, - $this->feedItemAuthor( $row ), - $comments ); - } + $feedItem = new FeedSMItem( + $title, + $this->pubDate, + $this->getKeywords( $title ) + ); $feed->outItem( $feedItem ); } // end while fetchobject @@ -285,9 +239,6 @@ $this->params['redirects'] = $wgRequest->getVal( 'redirects', 'exclude' ); $this->params['stable'] = $wgRequest->getVal( 'stable', 'only' ); $this->params['quality'] = $wgRequest->getVal( 'qualitypages', 'only' ); - $this->params['suppressErrors'] = $wgRequest->getBool( 'supresserrors', false ); - $this->params['useNameSpace'] = $wgRequest->getBool( 'usenamespace', false ); - $this->params['useCurId'] = $wgRequest->getBool( 'usecurid', false ); $this->params['feed'] = $wgRequest->getVal( 'feed', 'sitemap' ); $this->params['catCount'] = count( $this->categories ); @@ -317,14 +268,6 @@ } - function feedItemAuthor( $row ) { - return isset( $row->user_text ) ? $row->user_text : 'Wikinews'; - } - - function feedItemDesc( $row ) { - return isset( $row->comment ) ? htmlspecialchars( $row->comment ) : ''; - } - /** * @param Title $title * @return string Modified: trunk/extensions/GoogleNewsSitemap/SitemapFeed.php =================================================================== --- trunk/extensions/GoogleNewsSitemap/SitemapFeed.php 2011-03-13 07:34:37 UTC (rev 83807) +++ trunk/extensions/GoogleNewsSitemap/SitemapFeed.php 2011-03-13 09:09:39 UTC (rev 83808) @@ -1,36 +1,29 @@ <?php if ( !defined( 'MEDIAWIKI' ) ) die(); -class SitemapFeed extends FeedSMItem { +class SitemapFeed extends ChannelFeed { private $writer; function __construct() { - global $wgOut; $this->writer = new XMLWriter(); - $wgOut->disable(); } + function contentType() { + return 'application/xml'; + } + /** - * Output feed headers + * Output feed headers. */ function outHeader() { - global $wgOut, $wgRequest; + $this->httpHeaders(); - // FIXME: Why can't we just pick one mime type and always send that? - $ctype = $wgRequest->getVal( 'ctype', 'application/xml' ); - $allowedctypes = array( 'application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml' ); - $mimetype = in_array( $ctype, $allowedctypes ) ? $ctype : 'application/xml'; - - header( "Content-type: $mimetype; charset=UTF-8" ); - $wgOut->sendCacheControl(); - $this->writer->openURI( 'php://output' ); $this->writer->setIndent( true ); $this->writer->startDocument( "1.0", "UTF-8" ); $this->writer->startElement( "urlset" ); $this->writer->writeAttribute( "xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9" ); $this->writer->writeAttribute( "xmlns:news", "http://www.google.com/schemas/sitemap-news/0.9" ); - $this->writer->flush(); } /** @@ -38,30 +31,42 @@ * @param FeedSMItem $item to be output */ function outItem( $item ) { + + if ( !( $item instanceof FeedItem ) ) { + throw new MWException( "Requires a FeedItem or subclass." ); + } + if ( !( $item instanceof FeedSMItem ) ) { + $item = FeedSMItem::newFromFeedItem( $item ); + } + $this->writer->startElement( "url" ); + $this->writer->startElement( "loc" ); $this->writer->text( $item->getUrl() ); $this->writer->endElement(); + $this->writer->startElement( "news:news" ); + $this->writer->startElement( "news:publication_date" ); - $this->writer->text( $item->getPubDate() ); + $this->writer->text( wfTimestamp( TS_ISO_8601, $item->getDate() ) ); $this->writer->endElement(); + + $this->writer->startElement( "news:title" ); + $this->writer->text( $item->getTitle() ); + $this->writer->endElement(); + if ( $item->getKeywords() ) { $this->writer->startElement( "news:keywords" ); $this->writer->text( $item->getKeywords() ); $this->writer->endElement(); } + $this->writer->endElement(); // end news:news if ( $item->getLastMod() ) { $this->writer->startElement( "lastmod" ); - $this->writer->text( $item->getLastMod() ); + $this->writer->text( wfTimestamp( TS_ISO_8601, $item->getLastMod() ) ); $this->writer->endElement(); } - if ( $item->getPriority() ) { - $this->writer->startElement( "priority" ); - $this->writer->text( $item->getPriority() ); - $this->writer->endElement(); - } $this->writer->endElement(); // end url } _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs