https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114161
Revision: 114161 Author: maxsem Date: 2012-03-19 18:17:29 +0000 (Mon, 19 Mar 2012) Log Message: ----------- Follow-up r114129: * Fixed leftoffs from excerpts --> extracts rename * ...including broken OpenSearchXml hook handler * Split exlength to exchars and exsentences for better control over stuff being returned Modified Paths: -------------- trunk/extensions/MobileFrontend/MobileFrontend.php trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php Modified: trunk/extensions/MobileFrontend/MobileFrontend.php =================================================================== --- trunk/extensions/MobileFrontend/MobileFrontend.php 2012-03-19 18:09:40 UTC (rev 114160) +++ trunk/extensions/MobileFrontend/MobileFrontend.php 2012-03-19 18:17:29 UTC (rev 114161) @@ -132,7 +132,7 @@ $wgHooks['APIAfterExecute'][] = 'ApiParseExtender::onAPIAfterExecute'; $wgHooks['APIGetParamDescription'][] = 'ApiParseExtender::onAPIGetParamDescription'; $wgHooks['APIGetDescription'][] = 'ApiParseExtender::onAPIGetDescription'; -$wgHooks['OpenSearchXml'][] = 'ApiQueryExcerpts::onOpenSearchXml'; +$wgHooks['OpenSearchXml'][] = 'ApiQueryExtracts::onOpenSearchXml'; function efMobileFrontend_Setup() { global $wgExtMobileFrontend, $wgHooks; @@ -174,6 +174,6 @@ } /** - * Whether this extension should provide its excerpts to OpenSearchXml extension + * Whether this extension should provide its extracts to OpenSearchXml extension */ $wgMFExtendOpenSearchXml = false; Modified: trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php =================================================================== --- trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php 2012-03-19 18:09:40 UTC (rev 114160) +++ trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php 2012-03-19 18:17:29 UTC (rev 114161) @@ -21,9 +21,10 @@ wfProfileOut( __METHOD__ ); return; } - $isXml = $this->getMain()->getPrinter()->getFormat() == 'XML'; + $isXml = $this->getMain()->isInternalMode() || $this->getMain()->getPrinter()->getFormat() == 'XML'; $result = $this->getResult(); $params = $this->params = $this->extractRequestParams(); + $this->requireMaxOneParameter( $params, 'chars', 'sentences' ); $continue = 0; $limit = intval( $params['limit'] ); if ( $limit > 1 && !$params['intro'] ) { @@ -45,9 +46,8 @@ break; } $text = $this->getExtract( $t ); - if ( isset( $params['length'] ) ) { - $text = $this->trimText( $text ); - } + $text = $this->truncate( $text ); + if ( $isXml ) { $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', array( '*' => $text ) ); } else { @@ -74,7 +74,7 @@ $api = new ApiMain( new FauxRequest( array( 'action' => 'query', - 'prop' => 'excerpts', + 'prop' => 'extracts', 'explaintext' => true, 'exlimit' => count( $results ), 'pageids' => implode( '|', $pageIds ), @@ -83,8 +83,8 @@ $api->execute(); $data = $api->getResultData(); foreach ( $pageIds as $id ) { - if ( isset( $data['query']['pages'][$id]['excerpts'][0] ) ) { - $results[$id]['extract'] = $data['query']['pages'][$id]['extract'][0]; + if ( isset( $data['query']['pages'][$id]['extract']['*'] ) ) { + $results[$id]['extract'] = $data['query']['pages'][$id]['extract']['*']; $results[$id]['extract trimmed'] = false; } } @@ -92,7 +92,7 @@ } /** - * Returns a processed, but not trimmed excerpt + * Returns a processed, but not trimmed extract * @param Title $title * @return string */ @@ -188,10 +188,8 @@ } /** - * Converts page HTML into an excerpt + * Converts page HTML into an extract * @param string $text - * @param Title $title - * @param bool $plainText * @return string */ private function convertText( $text ) { @@ -203,16 +201,22 @@ return trim( $text ); } + private function truncate( $text ) { + if ( $this->params['chars'] ) { + return $this->getFirstChars( $text, $this->params['chars'] ); + } elseif ( $this->params['sentences'] ) { + return $this->getFirstSentences( $text, $this->params['sentences'] ); + } + return $text; + } + /** * * @param string $text * @param int $requestedLength - * @param bool $plainText * @return string */ - private function trimText( $text, $requestedLength, $plainText ) { - global $wgUseTidy; - + private function getFirstChars( $text, $requestedLength ) { wfProfileIn( __METHOD__ ); $length = mb_strlen( $text ); if ( $length <= $requestedLength ) { @@ -223,20 +227,69 @@ preg_match( $pattern, $text, $m ); $text = $m[0]; // Fix possibly unclosed tags - if ( $wgUseTidy && !$plainText ) { + $text = $this->tidy( $text ); + $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text(); + wfProfileOut( __METHOD__ ); + return $text; + } + + /** + * + * @param string $text + * @param int $requestedSentenceCount + */ + private function getFirstSentences( $text, $requestedSentenceCount ) { + wfProfileIn( __METHOD__ ); + // Based on code from OpenSearchXml by Brion Vibber + $endchars = array( + '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII + '。', // full-width ideographic full-stop + '.', '!', '?', // double-width roman forms + '。', // half-width ideographic full stop + ); + + $endgroup = implode( '|', $endchars ); + $end = "(?:$endgroup)"; + $sentence = ".+?$end+"; + $regexp = "/^($sentence){{$requestedSentenceCount}}/u"; + $matches = array(); + if( preg_match( $regexp, $text, $matches ) ) { + return $matches[0]; + } else { + // Just return the first line + $lines = explode( "\n", $text ); + return trim( $lines[0] ); + } + $text = $this->tidy( $text ); + wfProfileOut( __METHOD__ ); + return $text; + } + + /** + * A simple wrapper around tidy + * @param string $text + */ + private function tidy( $text ) { + global $wgUseTidy; + + wfProfileIn( __METHOD__ ); + if ( $wgUseTidy && !$this->params['plaintext'] ) { $text = trim ( MWTidy::tidy( $text ) ); } - $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text(); wfProfileOut( __METHOD__ ); return $text; } public function getAllowedParams() { return array( - 'length' => array( + 'chars' => array( ApiBase::PARAM_TYPE => 'integer', ApiBase::PARAM_MIN => 1, ), + 'sentences' => array( + ApiBase::PARAM_TYPE => 'integer', + ApiBase::PARAM_MIN => 1, + ), 'limit' => array( ApiBase::PARAM_DFLT => 1, ApiBase::PARAM_TYPE => 'limit', @@ -258,7 +311,8 @@ public function getParamDescription() { return array( - 'length' => 'How many characters to return, actual text returned might be slightly longer.', + 'chars' => 'How many characters to return, actual text returned might be slightly longer.', + 'sentences' => 'How many sentences to return', 'limit' => 'How many extracts to return. ', 'intro' => 'Return only content before the first section', 'plaintext' => 'Return extracts as plaintext instead of limited HTML', @@ -284,7 +338,7 @@ public function getExamples() { return array( - 'api.php?action=query&prop=extracts&exlength=175&titles=Therion' => 'Get a 175-character extract', + 'api.php?action=query&prop=extracts&exchars=175&titles=Therion' => 'Get a 175-character extract', ); } @@ -329,8 +383,8 @@ $text = parent::getText(); if ( $this->plainText ) { $text = html_entity_decode( $text ); - $text = str_replace( "\r", "\n", $text ); - $text = preg_replace( "/\n{3,}/", "\n\n", $text ); + $text = str_replace( "\r", "\n", $text ); // for Windows + $text = preg_replace( "/\n{3,}/", "\n\n", $text ); // normalise newlines $text = preg_replace_callback( "/" . ApiQueryExtracts::SECTION_MARKER_START . '(\d)'. ApiQueryExtracts::SECTION_MARKER_END . "(.*?)$/m", array( $this, 'sectionCallback' ), _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs