Aude has uploaded a new change for review. https://gerrit.wikimedia.org/r/252931
Change subject: Fix uncaught exception in Special:ItemDisambiguation ...................................................................... Fix uncaught exception in Special:ItemDisambiguation The exception happens with some invalid language codes, such as "<omg>" though "omg" is considered "valid". This makes validation of the language code happen before doing search. If the language code is invalid, then show an error message with formatting that this an error. Also, make use of ContentLanguages to decouple the code from the Language class. WikibaseContentLanguages handles stuff like $wgExtraLanguageNames. As well, add validation in TermSearchInteractor and throw an InvalidArgumentException if the code is invalid, instead of hitting this later during handling language fallback in some cases. This also avoids doing unnecessary database requests. If the language code is invalid, then search would find no terms anyway unless there is bad data in the terms table. Bug: T118556 Change-Id: I1c1d73acff9d55d326ac6f65b3be40103ed87e56 --- M lib/includes/Interactors/TermIndexSearchInteractor.php M lib/includes/Interactors/TermSearchInteractor.php M lib/tests/phpunit/Interactors/TermIndexSearchInteractorTest.php M repo/includes/WikibaseRepo.php M repo/includes/specials/SpecialItemDisambiguation.php M repo/tests/phpunit/includes/specials/SpecialItemDisambiguationTest.php 6 files changed, 239 insertions(+), 62 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/31/252931/1 diff --git a/lib/includes/Interactors/TermIndexSearchInteractor.php b/lib/includes/Interactors/TermIndexSearchInteractor.php index 1416a20..612d3b1 100644 --- a/lib/includes/Interactors/TermIndexSearchInteractor.php +++ b/lib/includes/Interactors/TermIndexSearchInteractor.php @@ -2,9 +2,11 @@ namespace Wikibase\Lib\Interactors; +use InvalidArgumentException; use Wikibase\DataModel\Entity\EntityId; use Wikibase\DataModel\Term\Term; use Wikibase\LanguageFallbackChainFactory; +use Wikibase\Lib\ContentLanguages; use Wikibase\Lib\Store\LanguageFallbackLabelDescriptionLookup; use Wikibase\Store\BufferingTermLookup; use Wikibase\TermIndex; @@ -42,6 +44,11 @@ private $labelDescriptionLookup; /** + * @var ContentLanguages + */ + private $contentLanguages; + + /** * @var string languageCode to use for display terms */ private $displayLanguageCode; @@ -70,18 +77,21 @@ * @param TermIndex $termIndex Used to search the terms * @param LanguageFallbackChainFactory $fallbackFactory * @param BufferingTermLookup $bufferingTermLookup Provides the displayTerms + * @param ContentLanguages $contentLanguages * @param string $displayLanguageCode */ public function __construct( TermIndex $termIndex, LanguageFallbackChainFactory $fallbackFactory, BufferingTermLookup $bufferingTermLookup, + ContentLanguages $contentLanguages, $displayLanguageCode ) { Assert::parameterType( 'string', $displayLanguageCode, '$displayLanguageCode' ); $this->termIndex = $termIndex; $this->bufferingTermLookup = $bufferingTermLookup; $this->languageFallbackChainFactory = $fallbackFactory; + $this->contentLanguages = $contentLanguages; $this->displayLanguageCode = $displayLanguageCode; $this->labelDescriptionLookup = new LanguageFallbackLabelDescriptionLookup( $this->bufferingTermLookup, @@ -162,8 +172,13 @@ * @param string[] $termTypes * * @returns TermSearchResult[] + * @throws InvalidArgumentException */ public function searchForEntities( $text, $languageCode, $entityType, array $termTypes ) { + if ( !$this->contentLanguages->hasLanguage( $languageCode ) ) { + throw new InvalidArgumentException( '$languageCode is not valid.' ); + } + $matchedTermIndexEntries = $this->getMatchingTermIndexEntries( $text, $languageCode, $entityType, $termTypes ); $entityIds = $this->getEntityIdsForTermIndexEntries( $matchedTermIndexEntries ); diff --git a/lib/includes/Interactors/TermSearchInteractor.php b/lib/includes/Interactors/TermSearchInteractor.php index 959df49..27b04cf 100644 --- a/lib/includes/Interactors/TermSearchInteractor.php +++ b/lib/includes/Interactors/TermSearchInteractor.php @@ -2,6 +2,8 @@ namespace Wikibase\Lib\Interactors; +use InvalidArgumentException; + /** * Interface for searching for terms * @@ -21,6 +23,7 @@ * @param string[] $termTypes Types of Term to return, array of Wikibase\TermIndexEntry::TYPE_* * * @returns TermSearchResult[] + * @throws InvalidArgumentException */ public function searchForEntities( $text, $languageCode, $entityType, array $termTypes ); diff --git a/lib/tests/phpunit/Interactors/TermIndexSearchInteractorTest.php b/lib/tests/phpunit/Interactors/TermIndexSearchInteractorTest.php index 314bf52..2bd3ab4 100644 --- a/lib/tests/phpunit/Interactors/TermIndexSearchInteractorTest.php +++ b/lib/tests/phpunit/Interactors/TermIndexSearchInteractorTest.php @@ -11,6 +11,7 @@ use Wikibase\LanguageFallbackChainFactory; use Wikibase\Lib\Interactors\TermIndexSearchInteractor; use Wikibase\Lib\Interactors\TermSearchResult; +use Wikibase\Lib\WikibaseContentLanguages; use Wikibase\Store\BufferingTermLookup; use Wikibase\TermIndexEntry; use Wikibase\Test\MockTermIndex; @@ -151,6 +152,25 @@ return $mockFallbackChain; } + private function getContentLanguages() { + $languageCodes = array( 'br', 'de', 'en', 'en-ca', 'en-gb', 'fr' ); + + $contentLanguages = $this->getMock( 'Wikibase\Lib\ContentLanguages' ); + $contentLanguages->expects( $this->any() ) + ->method( 'getLanguages' ) + ->will( $this->returnCallback( function() use( $languageCodes ) { + return $languageCodes; + } ) ); + + $contentLanguages->expects( $this->any() ) + ->method( 'hasLanguage' ) + ->will( $this->returnCallback( function( $languageCode ) use ( $languageCodes ) { + return in_array( $languageCode, $languageCodes ); + } ) ); + + return $contentLanguages; + } + /** * @param bool $caseSensitive * @param bool $prefixSearch @@ -169,6 +189,7 @@ $this->getMockTermIndex(), $this->getMockLanguageFallbackChainFactory(), $this->getMockBufferingTermLookup(), + $this->getContentLanguages(), 'pt' ); if ( $caseSensitive !== null ) { @@ -351,6 +372,18 @@ ); } + public function testSearchForEntities_withInvalidLanguageCode() { + $interactor = $this->newTermSearchInteractor(); + + $this->setExpectedException( 'InvalidArgumentException' ); + $interactor->searchForEntities( + 'kittens!!!!', + '<omg-invalid-lang>', + 'item', + array( TermIndexEntry::TYPE_LABEL ) + ); + } + /** * @dataProvider provideLimitInputAndExpected */ diff --git a/repo/includes/WikibaseRepo.php b/repo/includes/WikibaseRepo.php index 38f4571..0dff629 100644 --- a/repo/includes/WikibaseRepo.php +++ b/repo/includes/WikibaseRepo.php @@ -499,6 +499,7 @@ $this->getStore()->getTermIndex(), $this->getLanguageFallbackChainFactory(), $this->getBufferingTermLookup(), + $this->getTermsLanguages(), $displayLanguageCode ); } diff --git a/repo/includes/specials/SpecialItemDisambiguation.php b/repo/includes/specials/SpecialItemDisambiguation.php index a52e03f..6f53374 100644 --- a/repo/includes/specials/SpecialItemDisambiguation.php +++ b/repo/includes/specials/SpecialItemDisambiguation.php @@ -4,11 +4,13 @@ use HTMLForm; use Html; -use Language; +use WebRequest; use Wikibase\ItemDisambiguation; +use Wikibase\Lib\ContentLanguages; use Wikibase\Lib\LanguageNameLookup; use Wikibase\Lib\Interactors\TermIndexSearchInteractor; use Wikibase\Lib\Interactors\TermSearchResult; +use Wikibase\Lib\WikibaseContentLanguages; use Wikibase\Repo\WikibaseRepo; use Wikibase\TermIndexEntry; @@ -23,7 +25,7 @@ * @author Daniel Kinzler * @author Adam Shorland */ -class SpecialItemDisambiguation extends SpecialWikibasePage { +class SpecialItemDisambiguation extends SpecialWikibaseRepoPage { /** * @var ItemDisambiguation DO NOT ACCESS DIRECTLY use this->getItemDisambiguation @@ -34,6 +36,11 @@ * @var TermIndexSearchInteractor|null DO NOT ACCESS DIRECTLY use this->getSearchInteractor */ private $searchInteractor = null; + + /** + * @var ContentLanguages + */ + private $contentLanguages; /** * @var int @@ -47,7 +54,11 @@ */ public function __construct() { parent::__construct( 'ItemDisambiguation', '', true ); - //@todo: make this configurable + + // @todo inject this + $this->contentLanguages = new WikibaseContentLanguages(); + + // @todo make this configurable $this->limit = 100; } @@ -57,13 +68,16 @@ * * @param ItemDisambiguation $itemDisambiguation * @param TermIndexSearchInteractor|null $searchInteractor + * @param ContentLanguages $contentLanguages */ public function initServices( ItemDisambiguation $itemDisambiguation, - TermIndexSearchInteractor $searchInteractor + TermIndexSearchInteractor $searchInteractor, + ContentLanguages $contentLanguages ) { $this->itemDisambiguation = $itemDisambiguation; $this->searchInteractor = $searchInteractor; + $this->contentLanguages = $contentLanguages; } /** @@ -73,8 +87,9 @@ */ private function getSearchInteractor( $displayLanguageCode ) { if ( $this->searchInteractor === null ) { - $interactor = WikibaseRepo::getDefaultInstance()->newTermSearchInteractor( $displayLanguageCode ); - $this->searchInteractor = $interactor; + $this->searchInteractor = WikibaseRepo::getDefaultInstance()->newTermSearchInteractor( + $displayLanguageCode + ); } return $this->searchInteractor; } @@ -104,44 +119,96 @@ public function execute( $subPage ) { parent::execute( $subPage ); - // Setup $request = $this->getRequest(); - $parts = $subPage === '' ? array() : explode( '/', $subPage, 2 ); - $languageCode = $request->getVal( 'language', isset( $parts[0] ) ? $parts[0] : '' ); - if ( $languageCode === '' ) { - $languageCode = $this->getLanguage()->getCode(); - } + $subPageParts = $subPage === '' ? array() : explode( '/', $subPage, 2 ); - if ( $request->getCheck( 'label' ) ) { - $label = $request->getText( 'label' ); - } else { - $label = isset( $parts[1] ) ? str_replace( '_', ' ', $parts[1] ) : ''; - } + $languageCode = $this->extractLanguageCode( $request, $subPageParts ); + $label = $this->extractLabel( $request, $subPageParts ); $this->switchForm( $languageCode, $label ); // Display the result set if ( isset( $languageCode ) && isset( $label ) && $label !== '' ) { - $searchInteractor = $this->getSearchInteractor( $this->getLanguage()->getCode() ); - $searchInteractor->setLimit( $this->limit ); - $searchInteractor->setIsCaseSensitive( false ); - $searchInteractor->setIsPrefixSearch( false ); - $searchInteractor->setUseLanguageFallback( true ); - - $searchResults = $searchInteractor->searchForEntities( - $label, - $languageCode, - 'item', - array( TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_ALIAS ) - ); - - if ( 0 < count( $searchResults ) ) { - $this->getOutput()->setPageTitle( $this->msg( 'wikibase-disambiguation-title', $label )->escaped() ); - $this->displayDisambiguationPage( $searchResults ); + if ( !$this->contentLanguages->hasLanguage( $languageCode ) ) { + $this->showErrorHTML( + $this->msg( 'wikibase-itemdisambiguation-invalid-langcode' )->escaped() + ); } else { - $this->showNothingFound( $languageCode, $label ); + $searchResults = $this->getSearchResults( $label, $languageCode ); + + if ( !empty( $searchResults ) ) { + $this->displaySearchResults( $searchResults, $label ); + } else { + $this->showNothingFound( $languageCode, $label ); + } } } + } + + /** + * @param WebRequest $request + * @param array $subPageParts + * + * @return string + */ + private function extractLanguageCode( WebRequest $request, array $subPageParts ) { + $languageCode = $request->getVal( + 'language', + isset( $subPageParts[0] ) ? $subPageParts[0] : '' + ); + + if ( $languageCode === '' ) { + $languageCode = $this->getLanguage()->getCode(); + } + + return $languageCode; + } + + /** + * @param WebRequest $request + * @param array $subPageParts + * + * @return string + */ + private function extractLabel( WebRequest $request, array $subPageParts ) { + if ( $request->getCheck( 'label' ) ) { + return $request->getText( 'label' ); + } + + return isset( $subPageParts[1] ) ? str_replace( '_', ' ', $subPageParts[1] ) : ''; + } + + /** + * @param TermSearchResult[] $searchResults + * @param string $label + */ + private function displaySearchResults( array $searchResults, $label ) { + $this->getOutput()->setPageTitle( + $this->msg( 'wikibase-disambiguation-title', $label )->escaped() + ); + + $this->displayDisambiguationPage( $searchResults ); + } + + /** + * @param string $label + * @param string $languageCode + * + * @return TermSearchResult[] + */ + private function getSearchResults( $label, $languageCode ) { + $searchInteractor = $this->getSearchInteractor( $this->getLanguage()->getCode() ); + $searchInteractor->setLimit( $this->limit ); + $searchInteractor->setIsCaseSensitive( false ); + $searchInteractor->setIsPrefixSearch( false ); + $searchInteractor->setUseLanguageFallback( true ); + + return $searchInteractor->searchForEntities( + $label, + $languageCode, + 'item', + array( TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_ALIAS ) + ); } /** @@ -151,25 +218,19 @@ * @param string $label */ private function showNothingFound( $languageCode, $label ) { - // No results found - if ( ( Language::isValidBuiltInCode( $languageCode ) && ( Language::fetchLanguageName( $languageCode ) !== "" ) ) ) { - $this->getOutput()->addWikiMsg( 'wikibase-itemdisambiguation-nothing-found' ); + $this->getOutput()->addWikiMsg( 'wikibase-itemdisambiguation-nothing-found' ); - if ( $languageCode === $this->getLanguage()->getCode() ) { - $searchLink = $this->getTitleFor( 'Search' ); - $this->getOutput()->addWikiMsg( - 'wikibase-itemdisambiguation-search', - $searchLink->getFullURL( array( 'search' => $label ) ) - ); - $createLink = $this->getTitleFor( 'NewItem' ); - $this->getOutput()->addWikiMsg( - 'wikibase-itemdisambiguation-create', - $createLink->getFullURL( array( 'label' => $label ) ) - ); - } - } else { - // No valid language code - $this->getOutput()->addWikiMsg( 'wikibase-itemdisambiguation-invalid-langcode' ); + if ( $languageCode === $this->getLanguage()->getCode() ) { + $searchLink = $this->getTitleFor( 'Search' ); + $this->getOutput()->addWikiMsg( + 'wikibase-itemdisambiguation-search', + $searchLink->getFullURL( array( 'search' => $label ) ) + ); + $createLink = $this->getTitleFor( 'NewItem' ); + $this->getOutput()->addWikiMsg( + 'wikibase-itemdisambiguation-create', + $createLink->getFullURL( array( 'label' => $label ) ) + ); } } @@ -226,7 +287,9 @@ ->setFooterText( Html::element( 'p', array(), - $this->msg( 'wikibase-itemdisambiguation-form-hints' )->numParams( $this->limit )->text() + $this->msg( 'wikibase-itemdisambiguation-form-hints' )->numParams( + $this->limit + )->text() ) ) ->setWrapperLegendMsg( 'wikibase-itemdisambiguation-lookup-fieldset' ) ->suppressDefaultSubmit() diff --git a/repo/tests/phpunit/includes/specials/SpecialItemDisambiguationTest.php b/repo/tests/phpunit/includes/specials/SpecialItemDisambiguationTest.php index 5d7fba9..d0aa41b 100644 --- a/repo/tests/phpunit/includes/specials/SpecialItemDisambiguationTest.php +++ b/repo/tests/phpunit/includes/specials/SpecialItemDisambiguationTest.php @@ -3,6 +3,7 @@ namespace Wikibase\Test; use FauxRequest; +use InvalidArgumentException; use Wikibase\DataModel\Entity\ItemId; use Wikibase\DataModel\Term\Term; use Wikibase\ItemDisambiguation; @@ -47,7 +48,7 @@ * @return TermIndexSearchInteractor */ private function getMockSearchInteractor() { - $returnResults = array( + $searchResults = array( array( 'entityId' => new ItemId( 'Q2' ), 'matchedTermType' => 'label', @@ -68,15 +69,31 @@ $mock = $this->getMockBuilder( 'Wikibase\Lib\Interactors\TermIndexSearchInteractor' ) ->disableOriginalConstructor() ->getMock(); + $mock->expects( $this->any() ) ->method( 'searchForEntities' ) - ->with( - $this->equalTo( 'Foo' ), - $this->equalTo( 'fr' ), - $this->equalTo( 'item' ), - $this->equalTo( array( TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_ALIAS ) ) - ) - ->will( $this->returnValue( $returnResults ) ); + ->will( $this->returnCallback( + function( $text, $lang, $entityType, array $termTypes ) use ( $searchResults ) { + if ( $lang !== 'fr' ) { + throw new InvalidArgumentException( 'Not a valid language code' ); + } + + $expectedTermTypes = array( + TermIndexEntry::TYPE_LABEL, + TermIndexEntry::TYPE_ALIAS + ); + + if ( + $text === 'Foo' && + $entityType === 'item' && + $termTypes === $expectedTermTypes + ) { + return $searchResults; + } + + return array(); + } + ) ); $mock->expects( $this->any() ) ->method( 'setIsCaseSensitive' ) @@ -93,11 +110,31 @@ return $mock; } + private function getContentLanguages() { + $languageCodes = array( 'ar', 'de', 'en', 'fr' ); + + $contentLanguages = $this->getMock( 'Wikibase\Lib\ContentLanguages' ); + $contentLanguages->expects( $this->any() ) + ->method( 'getLanguages' ) + ->will( $this->returnCallback( function() use( $languageCodes ) { + return $languageCodes; + } ) ); + + $contentLanguages->expects( $this->any() ) + ->method( 'hasLanguage' ) + ->will( $this->returnCallback( function( $languageCode ) use ( $languageCodes ) { + return in_array( $languageCode, $languageCodes ); + } ) ); + + return $contentLanguages; + } + protected function newSpecialPage() { $page = new SpecialItemDisambiguation(); $page->initServices( $this->getMockItemDisambiguation(), - $this->getMockSearchInteractor() + $this->getMockSearchInteractor(), + $this->getContentLanguages() ); return $page; } @@ -153,4 +190,29 @@ } } + /** + * @dataProvider execute_withInvalidLanguageCodeProvider + */ + public function testExecute_withInvalidLanguageCode( $userLanguageCode, $searchLanguageCode ) { + $data = array( + 'language' => $searchLanguageCode, + 'label' => 'Foo' + ); + + list( $output, ) = $this->executeSpecialPage( + '', + new FauxRequest( $data ), + $userLanguageCode + ); + + $this->assertContains( 'wikibase-itemdisambiguation-invalid-langcode', $output ); + } + + public function execute_withInvalidLanguageCodeProvider() { + return array( + array( 'qqx', '<omg invalid language>' ), + array( 'qqx', 'ooooooooo' ) + ); + } + } -- To view, visit https://gerrit.wikimedia.org/r/252931 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1c1d73acff9d55d326ac6f65b3be40103ed87e56 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Aude <aude.w...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits