jenkins-bot has submitted this change and it was merged. Change subject: Implement TermSearchInteractor ......................................................................
Implement TermSearchInteractor Also adds a convenience method to WikibaseRepo Bug: T90692 Change-Id: I06714ad2d793e7927a2eb07715e8853a2c170715 --- M lib/tests/phpunit/store/MockTermIndex.php A repo/includes/Interactors/TermIndexSearchInteractor.php A repo/includes/Interactors/TermSearchInteractor.php M repo/includes/WikibaseRepo.php A repo/tests/phpunit/includes/Interactors/TermIndexSearchInteractorTest.php 5 files changed, 850 insertions(+), 2 deletions(-) Approvals: Daniel Kinzler: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/tests/phpunit/store/MockTermIndex.php b/lib/tests/phpunit/store/MockTermIndex.php index 7f4bd68..45e294e 100644 --- a/lib/tests/phpunit/store/MockTermIndex.php +++ b/lib/tests/phpunit/store/MockTermIndex.php @@ -319,6 +319,10 @@ } /** + * Returns the same as getMatchingTerms simply making sure only one term + * is returned per EntityId. This is the first term. + * Weighting does not affect the order of return by this method. + * * @param TermIndexEntry[] $terms * @param string|string[]|null $termType * @param string|string[]|null $entityType @@ -332,8 +336,17 @@ $entityType = null, array $options = array() ) { - throw new BadMethodCallException( __METHOD__ . ' not implemented' ); - // TODO: Implement getHighestRankMatchingTerms() method. + $options['orderByWeight'] = true; + $terms = $this->getMatchingTerms( $terms, $termType, $entityType, $options ); + $previousEntityIdSerializations = array(); + $returnTerms = array(); + foreach( $terms as $termIndexEntry ) { + if( !in_array( $termIndexEntry->getEntityId()->getSerialization(), $previousEntityIdSerializations ) ) { + $returnTerms[] = $termIndexEntry; + $previousEntityIdSerializations[] = $termIndexEntry->getEntityId()->getSerialization(); + } + } + return $returnTerms; } /** diff --git a/repo/includes/Interactors/TermIndexSearchInteractor.php b/repo/includes/Interactors/TermIndexSearchInteractor.php new file mode 100644 index 0000000..74012fe --- /dev/null +++ b/repo/includes/Interactors/TermIndexSearchInteractor.php @@ -0,0 +1,385 @@ +<?php + +namespace Wikibase\Repo\Interactors; + +use OutOfBoundsException; +use Wikibase\DataModel\Entity\EntityId; +use Wikibase\DataModel\Term\Term; +use Wikibase\LanguageFallbackChainFactory; +use Wikibase\Lib\Store\LanguageFallbackLabelDescriptionLookup; +use Wikibase\Store\BufferingTermLookup; +use Wikibase\TermIndex; +use Wikibase\TermIndexEntry; +use Wikimedia\Assert\Assert; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Adam Shorland + */ +class TermIndexSearchInteractor implements TermSearchInteractor { + + /** + * @var TermIndex + */ + private $termIndex; + + /** + * @var LanguageFallbackChainFactory + */ + private $languageFallbackChainFactory; + + /** + * @var BufferingTermLookup + */ + private $bufferingTermLookup; + + /** + * @var LanguageFallbackLabelDescriptionLookup + */ + private $labelDescriptionLookup; + + /** + * @var string languageCode to use for display terms + */ + private $displayLanguageCode; + + /** + * @var bool do a case sensitive search + */ + private $isCaseSensitive = false; + + /** + * @var bool do a prefix search + */ + private $isPrefixSearch = false; + + /** + * @var bool use language fallback in the search + */ + private $useLanguageFallback = true; + + /** + * @var int + */ + private $limit = 5000; + + /** + * @param TermIndex $termIndex Used to search the terms + * @param LanguageFallbackChainFactory $fallbackFactory + * @param BufferingTermLookup $bufferingTermLookup Provides the displayTerms + * @param string $displayLanguageCode + */ + public function __construct( + TermIndex $termIndex, + LanguageFallbackChainFactory $fallbackFactory, + BufferingTermLookup $bufferingTermLookup, + $displayLanguageCode + ) { + Assert::parameterType( 'string', $displayLanguageCode, '$displayLanguageCode' ); + $this->termIndex = $termIndex; + $this->bufferingTermLookup = $bufferingTermLookup; + $this->languageFallbackChainFactory = $fallbackFactory; + $this->displayLanguageCode = $displayLanguageCode; + $this->labelDescriptionLookup = new LanguageFallbackLabelDescriptionLookup( + $this->bufferingTermLookup, + $this->languageFallbackChainFactory->newFromLanguageCode( $this->displayLanguageCode ) + ); + } + + /** + * @return int + */ + public function getLimit() { + return $this->limit; + } + + /** + * @return bool + */ + public function getIsCaseSensitive() { + return $this->isCaseSensitive; + } + + /** + * @return bool + */ + public function getIsPrefixSearch() { + return $this->isPrefixSearch; + } + + /** + * @return bool + */ + public function getUseLanguageFallback() { + return $this->useLanguageFallback; + } + + /** + * @param int $limit Hard upper limit of 5000 + */ + public function setLimit( $limit ) { + Assert::parameterType( 'integer', $limit, '$limit' ); + Assert::parameter( $limit > 0, '$limit', 'Must be positive' ); + if ( $limit > 5000 ) { + $limit = 5000; + } + $this->limit = $limit; + } + + /** + * @param bool $caseSensitive + */ + public function setIsCaseSensitive( $caseSensitive ) { + Assert::parameterType( 'boolean', $caseSensitive, '$caseSensitive' ); + $this->isCaseSensitive = $caseSensitive; + } + + /** + * @param bool $prefixSearch + */ + public function setIsPrefixSearch( $prefixSearch ) { + Assert::parameterType( 'boolean', $prefixSearch, '$prefixSearch' ); + $this->isPrefixSearch = $prefixSearch; + } + + /** + * @param bool $useLanguageFallback + */ + public function setUseLanguageFallback( $useLanguageFallback ) { + Assert::parameterType( 'boolean', $useLanguageFallback, '$useLanguageFallback' ); + $this->useLanguageFallback = $useLanguageFallback; + } + + /** + * @see TermSearchInteractor::searchForEntities + * + * @param string $text + * @param string $languageCode + * @param string $entityType + * @param string[] $termTypes + * + * @returns array[] + */ + public function searchForEntities( $text, $languageCode, $entityType, array $termTypes ) { + $matchedTermIndexEntries = $this->getMatchingTermIndexEntries( $text, $languageCode, $entityType, $termTypes ); + $entityIds = $this->getEntityIdsForTermIndexEntries( $matchedTermIndexEntries ); + + $this->preFetchLabelsAndDescriptionsForDisplay( $entityIds ); + return $this->getSearchResults( $matchedTermIndexEntries ); + } + + /** + * @param string $text + * @param string $languageCode + * @param string $entityType + * @param string[] $termTypes + * + * @return TermIndexEntry[] + */ + private function getMatchingTermIndexEntries( $text, $languageCode, $entityType, array $termTypes ) { + $languageCodes = array( $languageCode ); + $matchedTermIndexEntries = $this->termIndex->getTopMatchingTerms( + $this->makeTermIndexEntryTemplates( + $text, + $languageCodes, + $termTypes + ), + null, + $entityType, + $this->getTermIndexOptions() + ); + // Shortcut out if we already have enough TermIndexEntries + if( count( $matchedTermIndexEntries ) == $this->limit || !$this->useLanguageFallback ) { + return $matchedTermIndexEntries; + } + + $matchedEntityIdSerializations = array(); + foreach( $matchedTermIndexEntries as $termIndexEntry ) { + $matchedEntityIdSerializations[] = $termIndexEntry->getEntityId()->getSerialization(); + } + + if( $this->useLanguageFallback ) { + $fallbackMatchedTermIndexEntries = $this->termIndex->getTopMatchingTerms( + $this->makeTermIndexEntryTemplates( + $text, + $this->addFallbackLanguageCodes( $languageCodes ), + $termTypes + ), + null, + $entityType, + $this->getTermIndexOptions() + ); + + // Remove any IndexEntries that are already have an match for + foreach( $fallbackMatchedTermIndexEntries as $key => $termIndexEntry ) { + if( in_array( $termIndexEntry->getEntityId()->getSerialization(), $matchedEntityIdSerializations ) ) { + unset( $fallbackMatchedTermIndexEntries[$key] ); + } + } + + // Matches in the main language will always be first + $matchedTermIndexEntries = array_merge( $matchedTermIndexEntries, $fallbackMatchedTermIndexEntries ); + if( count( $matchedTermIndexEntries ) > $this->limit ) { + array_slice( $matchedTermIndexEntries, 0, $this->limit, true ); + } + } + + return $matchedTermIndexEntries; + } + + /** + * @param TermIndexEntry[] $termIndexEntries + * + * @returns array[] + * @see TermSearchInteractor interface for return format + */ + private function getSearchResults( array $termIndexEntries ) { + $searchResults = array(); + foreach ( $termIndexEntries as $termIndexEntry ) { + $searchResults[] = $this->convertToSearchResult( $termIndexEntry ); + } + return array_values( $searchResults ); + } + + /** + * @param EntityId[] $entityIds + */ + private function preFetchLabelsAndDescriptionsForDisplay( array $entityIds ) { + $this->bufferingTermLookup->prefetchTerms( + $entityIds, + array( TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_DESCRIPTION ), + $this->addFallbackLanguageCodes( array( $this->displayLanguageCode ) ) + ); + } + + /** + * @param TermIndexEntry[] $termsIndexEntries + * + * @return EntityId[] + */ + private function getEntityIdsForTermIndexEntries( array $termsIndexEntries ) { + $entityIds = array(); + foreach( $termsIndexEntries as $termIndexEntry ) { + $entityId = $termIndexEntry->getEntityId(); + // We would hope that this would never happen, but is possible + if ( $entityId !== null ) { + // Use a key so that the array will end up being full of unique IDs + $entityIds[$entityId->getSerialization()] = $entityId; + } + } + return $entityIds; + } + + /** + * @param TermIndexEntry $termIndexEntry + * + * @returns array + * @see TermSearchInteractor interface for return format + */ + private function convertToSearchResult( TermIndexEntry $termIndexEntry ) { + $entityId = $termIndexEntry->getEntityId(); + return array( + TermSearchInteractor::ENTITYID_KEY => $entityId, + TermSearchInteractor::MATCHEDTERM_KEY => $termIndexEntry->getTerm(), + TermSearchInteractor::MATCHEDTERMTYPE_KEY => $termIndexEntry->getType(), + TermSearchInteractor::DISPLAYTERMS_KEY => $this->getDisplayTerms( $entityId ), + ); + } + + private function getTermIndexOptions() { + return array( + 'caseSensitive' => $this->isCaseSensitive, + 'prefixSearch' => $this->isPrefixSearch, + 'LIMIT' => $this->limit, + ); + } + + /** + * @param array $languageCodes + * + * @return array + */ + private function addFallbackLanguageCodes( array $languageCodes ) { + $languageCodesWithFallback = array(); + foreach ( $languageCodes as $languageCode ) { + $fallbackChain = $this->languageFallbackChainFactory->newFromLanguageCode( $languageCode ); + $languageCodesWithFallback = array_merge( + $languageCodesWithFallback, + $fallbackChain->getFetchLanguageCodes() + ); + } + + return array_unique( $languageCodesWithFallback ); + } + + /** + * @param EntityId $entityId + * + * @return Term[] array with possible keys TermIndexEntry::TYPE_* + */ + private function getDisplayTerms( EntityId $entityId ) { + $displayTerms = array(); + + $labelDisplayTerm = $this->getLabelDisplayTerm( $entityId ); + if( $labelDisplayTerm !== null ) { + $displayTerms[TermIndexEntry::TYPE_LABEL] = $labelDisplayTerm; + } + + $descriptionDisplayTerm = $this->getDescriptionDisplayTerm( $entityId ); + if( $descriptionDisplayTerm !== null ) { + $displayTerms[TermIndexEntry::TYPE_DESCRIPTION] = $descriptionDisplayTerm; + } + + return $displayTerms; + } + + /** + * @param EntityId $entityId + * + * @return null|Term + */ + private function getLabelDisplayTerm( EntityId $entityId ) { + try{ + return $this->labelDescriptionLookup->getLabel( $entityId ); + } catch( OutOfBoundsException $e ) { + return null; + } + } + + /** + * @param EntityId $entityId + * + * @return null|Term + */ + private function getDescriptionDisplayTerm( EntityId $entityId ) { + try{ + return $this->labelDescriptionLookup->getDescription( $entityId ); + } catch( OutOfBoundsException $e ) { + return null; + } + } + + /** + * @param string $text + * @param string[] $languageCodes + * @param string[] $termTypes + * + * @returns TermIndexEntry[] + */ + private function makeTermIndexEntryTemplates( $text, $languageCodes, $termTypes ) { + $terms = array(); + foreach ( $languageCodes as $languageCode ) { + foreach ( $termTypes as $termType ) { + $terms[] = new TermIndexEntry( array( + 'termText' => $text, + 'termLanguage' => $languageCode, + 'termType' => $termType, + ) ); + } + } + return $terms; + } + +} diff --git a/repo/includes/Interactors/TermSearchInteractor.php b/repo/includes/Interactors/TermSearchInteractor.php new file mode 100644 index 0000000..5de51c7 --- /dev/null +++ b/repo/includes/Interactors/TermSearchInteractor.php @@ -0,0 +1,41 @@ +<?php + +namespace Wikibase\Repo\Interactors; + +/** + * Interface for searching for terms + * + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Adam Shorland + */ +interface TermSearchInteractor { + + /** + * Keys used in the method return array + */ + const ENTITYID_KEY = 'entityId'; + const MATCHEDTERM_KEY = 'matchedTerm'; + const MATCHEDTERMTYPE_KEY = 'matchedTermType'; + const DISPLAYTERMS_KEY = 'displayTerms'; + + /** + * @since 0.5 + * + * @param string $text Term text to search for + * @param string $languageCode Language code to search in + * @param string $entityType Type of Entity to return + * @param string[] $termTypes Types of Term to return, array of Wikibase\TermIndexEntry::TYPE_* + * + * @returns array[] array of arrays containing the following: + * [ENTITYID_KEY] => EntityId EntityId object + * [MATCHEDTERM_KEY] => Term matched Term object + * [MATCHEDTERMTYPE_KEY] => string one of Wikibase\TermIndexEntry::TYPE_* + * [DISPLAYTERMS_KEY] => array array with possible keys Wikibase\TermIndexEntry::TYPE_* + * Wikibase\TermIndexEntry::TYPE_LABEL => Term + * Wikibase\TermIndexEntry::TYPE_DESCRIPTION => Term + */ + public function searchForEntities( $text, $languageCode, $entityType, array $termTypes ); + +} diff --git a/repo/includes/WikibaseRepo.php b/repo/includes/WikibaseRepo.php index d1cc47e..54e6203 100644 --- a/repo/includes/WikibaseRepo.php +++ b/repo/includes/WikibaseRepo.php @@ -71,6 +71,7 @@ use Wikibase\Repo\Content\PropertyHandler; use Wikibase\Repo\Hooks\EditFilterHookRunner; use Wikibase\Repo\Interactors\RedirectCreationInteractor; +use Wikibase\Repo\Interactors\TermIndexSearchInteractor; use Wikibase\Repo\LinkedData\EntityDataFormatProvider; use Wikibase\Repo\Localizer\ChangeOpValidationExceptionLocalizer; use Wikibase\Repo\Localizer\MessageParameterFormatter; @@ -342,6 +343,22 @@ /** * @since 0.5 * + * @param string $displayLanguageCode + * + * @return TermIndexSearchInteractor + */ + public function newTermSearchInteractor( $displayLanguageCode ) { + return new TermIndexSearchInteractor( + $this->getStore()->getTermIndex(), + $this->getLanguageFallbackChainFactory(), + $this->getTermLookup(), + $displayLanguageCode + ); + } + + /** + * @since 0.5 + * * @return EntityStore */ public function getEntityStore() { diff --git a/repo/tests/phpunit/includes/Interactors/TermIndexSearchInteractorTest.php b/repo/tests/phpunit/includes/Interactors/TermIndexSearchInteractorTest.php new file mode 100644 index 0000000..f6f6360 --- /dev/null +++ b/repo/tests/phpunit/includes/Interactors/TermIndexSearchInteractorTest.php @@ -0,0 +1,392 @@ +<?php + +namespace Wikibase\Test\Interactors; + +use PHPUnit_Framework_TestCase; +use Wikibase\DataModel\Entity\EntityId; +use Wikibase\DataModel\Entity\ItemId; +use Wikibase\DataModel\Entity\PropertyId; +use Wikibase\DataModel\Term\Term; +use Wikibase\DataModel\Term\TermFallback; +use Wikibase\LanguageFallbackChainFactory; +use Wikibase\Repo\Interactors\TermIndexSearchInteractor; +use Wikibase\Store\BufferingTermLookup; +use Wikibase\TermIndexEntry; +use Wikibase\Test\MockTermIndex; + +/** + * @covers Wikibase\Repo\Interactors\TermIndexSearchInteractor + * + * @group Wikibase + * @group WikibaseRepo + * @group WikibaseInteractor + * + * @licence GNU GPL v2+ + * @author Adam Shorland + */ +class TermIndexSearchInteractorTest extends PHPUnit_Framework_TestCase { + + private function getMockTermIndex() { + return new MockTermIndex( + array( + //Q111 - Has label, description and alias all the same + $this->getTermIndexEntry( 'Foo', 'en', TermIndexEntry::TYPE_LABEL, new ItemId( 'Q111' ) ), + $this->getTermIndexEntry( 'Foo', 'en', TermIndexEntry::TYPE_DESCRIPTION, new ItemId( 'Q111' ) ), + $this->getTermIndexEntry( 'Foo', 'en', TermIndexEntry::TYPE_ALIAS, new ItemId( 'Q111' ) ), + $this->getTermIndexEntry( 'FOO', 'en', TermIndexEntry::TYPE_ALIAS, new ItemId( 'Q111' ) ), + //Q333 + $this->getTermIndexEntry( 'Food is great', 'en', TermIndexEntry::TYPE_LABEL, new ItemId( 'Q333' ) ), + //Q555 + $this->getTermIndexEntry( 'Ta', 'en', TermIndexEntry::TYPE_ALIAS, new ItemId( 'Q555' ) ), + $this->getTermIndexEntry( 'Taa', 'en', TermIndexEntry::TYPE_ALIAS, new ItemId( 'Q555' ) ), + $this->getTermIndexEntry( 'TAAA', 'en-ca', TermIndexEntry::TYPE_ALIAS, new ItemId( 'Q555' ) ), + $this->getTermIndexEntry( 'Taa', 'en-ca', TermIndexEntry::TYPE_ALIAS, new ItemId( 'Q555' ) ), + //P22 + $this->getTermIndexEntry( 'Lama', 'en-ca', TermIndexEntry::TYPE_LABEL, new PropertyId( 'P22' ) ), + $this->getTermIndexEntry( 'La-description', 'en', TermIndexEntry::TYPE_DESCRIPTION, new PropertyId( 'P22' ) ), + //P44 + $this->getTermIndexEntry( 'Lama', 'en', TermIndexEntry::TYPE_LABEL, new PropertyId( 'P44' ) ), + $this->getTermIndexEntry( 'Lama-de-desc', 'de', TermIndexEntry::TYPE_DESCRIPTION, new PropertyId( 'P44' ) ), + ) + ); + } + + /** + * @param string $text + * @param string $languageCode + * @param string $termType + * @param EntityId|ItemId|PropertyId $entityId + * + * @returns TermIndexEntry + */ + private function getTermIndexEntry( $text, $languageCode, $termType, EntityId $entityId ) { + return new TermIndexEntry( array( + 'termText' => $text, + 'termLanguage' => $languageCode, + 'termType' => $termType, + 'entityId' => $entityId->getNumericId(), + 'entityType' => $entityId->getEntityType(), + ) ); + } + + /** + * Get a lookup that always returns a pt label and description suffixed by the entity ID + * + * @return BufferingTermLookup + */ + private function getMockBufferingTermLookup() { + $mock = $this->getMockBuilder( 'Wikibase\Store\BufferingTermLookup' ) + ->disableOriginalConstructor() + ->getMock(); + $mock->expects( $this->any() ) + ->method( 'prefetchTerms' ); + $mock->expects( $this->any() ) + ->method( 'getLabels' ) + ->will( $this->returnCallback( function( EntityId $entityId, $languageCodes ) { + $labels = array(); + foreach ( $languageCodes as $languageCode ) { + $labels[$languageCode] = 'label-' . $languageCode . '-' . $entityId->getSerialization(); + } + return $labels; + } + ) ); + $mock->expects( $this->any() ) + ->method( 'getDescriptions' ) + ->will( $this->returnCallback( function( EntityId $entityId, $languageCodes ) { + $descriptions = array(); + foreach ( $languageCodes as $languageCode ) { + $descriptions[$languageCode] = 'description-' . $languageCode . '-' . $entityId->getSerialization(); + } + return $descriptions; + } + ) ); + return $mock; + } + + private function getDisplayTerm( EntityId $entityId, $termType ) { + return new TermFallback( 'pt', $termType . '-pt-' . $entityId->getSerialization(), 'pt', 'pt' ); + } + + /** + * @return LanguageFallbackChainFactory + */ + private function getMockLanguageFallbackChainFactory() { + $testCase = $this; + $mockFactory = $this->getMockBuilder( 'Wikibase\LanguageFallbackChainFactory' ) + ->disableOriginalConstructor() + ->getMock(); + $mockFactory->expects( $this->any() ) + ->method( 'newFromLanguageCode' ) + ->will( $this->returnCallback( function( $langCode ) use ( $testCase ) { + return $testCase->getMockLanguageFallbackChainFromLanguage( $langCode ); + } ) ); + return $mockFactory; + } + + public function getMockLanguageFallbackChainFromLanguage( $langCode ) { + $mockFallbackChain = $this->getMockBuilder( 'Wikibase\LanguageFallbackChain' ) + ->disableOriginalConstructor() + ->getMock(); + $mockFallbackChain->expects( $this->any() ) + ->method( 'getFetchLanguageCodes' ) + ->will( $this->returnCallback( function () use( $langCode ) { + if ( $langCode === 'en-gb' || $langCode === 'en-ca' ) { + return array( $langCode, 'en' ); + } + return array( $langCode ); // no fallback for everything else... + } ) ); + $mockFallbackChain->expects( $this->any() ) + ->method( 'extractPreferredValue' ) + ->will( $this->returnCallback( function( $data ) { + foreach ( $data as $languageCode => $value ) { + return array( + 'value' => $value, + 'language' => $languageCode, + 'source' => $languageCode, + ); + } + return null; + } ) ); + return $mockFallbackChain; + } + + /** + * @param bool $caseSensitive + * @param bool $prefixSearch + * @param int $limit + * @param bool $useFallback + * + * @return TermIndexSearchInteractor + */ + private function newTermSearchInteractor( + $caseSensitive = null, + $prefixSearch = null, + $limit = null, + $useFallback = null + ) { + $interactor = new TermIndexSearchInteractor( + $this->getMockTermIndex(), + $this->getMockLanguageFallbackChainFactory(), + $this->getMockBufferingTermLookup(), + 'pt' + ); + if ( $caseSensitive !== null ) { + $interactor->setIsCaseSensitive( $caseSensitive ); + } + if ( $prefixSearch !== null ) { + $interactor->setIsPrefixSearch( $prefixSearch ); + } + if ( $limit !== null ) { + $interactor->setLimit( $limit ); + } + if ( $useFallback !== null ) { + $interactor->setUseLanguageFallback( $useFallback ); + } + return $interactor; + } + + public function provideSearchForEntitiesTest() { + $allTermTypes = array( + TermIndexEntry::TYPE_LABEL, + TermIndexEntry::TYPE_DESCRIPTION, + TermIndexEntry::TYPE_ALIAS + ); + return array( + 'No Results' => array( + $this->newTermSearchInteractor( false, false, 5000 ), + array( 'ABCDEFGHI123', 'br', 'item', $allTermTypes ), + array(), + ), + 'Q111 Foo en Label match exactly' => array( + $this->newTermSearchInteractor( false, false, 5000 ), + array( 'Foo', 'en', 'item', array( TermIndexEntry::TYPE_LABEL ) ), + array( + array( + 'entityId' => new ItemId( 'Q111' ), + 'term' => new Term( 'en', 'Foo' ), + 'termtype' => 'label', + ), + ), + ), + 'Q111&Q333 Foo en Label match prefix search' => array( + $this->newTermSearchInteractor( false, true, 5000 ), + array( 'Foo', 'en', 'item', array( TermIndexEntry::TYPE_LABEL ) ), + array( + array( + 'entityId' => new ItemId( 'Q111' ), + 'term' => new Term( 'en', 'Foo' ), + 'termtype' => 'label', + ), + array( + 'entityId' => new ItemId( 'Q333' ), + 'term' => new Term( 'en', 'Food is great' ), + 'termtype' => 'label', + ), + ), + ), + 'Q111&Q333 Foo en Label match prefix search LIMIT 1' => array( + $this->newTermSearchInteractor( false, true, 1 ), + array( 'Foo', 'en', 'item', array( TermIndexEntry::TYPE_LABEL ) ), + array( + array( + 'entityId' => new ItemId( 'Q111' ), + 'term' => new Term( 'en', 'Foo' ), + 'termtype' => 'label', + ), + ), + ), + 'Q111 Foo en-ca Label fallback to en' => array( + $this->newTermSearchInteractor( false, false, 5000 ), + array( 'Foo', 'en-ca', 'item', array( TermIndexEntry::TYPE_LABEL ) ), + array( + array( + 'entityId' => new ItemId( 'Q111' ), + 'term' => new Term( 'en', 'Foo' ), + 'termtype' => 'label', + ), + ), + ), + 'Q111 Foo en all term types match case insensitive' => array( + $this->newTermSearchInteractor( false, false, 5000 ), + array( 'Foo', 'en', 'item', $allTermTypes ), + array( + array( + 'entityId' => new ItemId( 'Q111' ), + 'term' => new Term( 'en', 'Foo' ), + 'termtype' => 'label', + ), + ), + ), + 'Q111 Foo en aliases match case sensitive' => array( + $this->newTermSearchInteractor( true, false, 5000 ), + array( 'Foo', 'en', 'item', $allTermTypes ), + array( + array( + 'entityId' => new ItemId( 'Q111' ), + 'term' => new Term( 'en', 'Foo' ), + 'termtype' => 'label', + ), + ), + ), + 'Q555 Ta en-ca with fallback aliases only' => array( + $this->newTermSearchInteractor( false, true, 5000 ), + array( 'Ta', 'en-ca', 'item', $allTermTypes ), + array( + array( + 'entityId' => new ItemId( 'Q555' ), + 'term' => new Term( 'en-ca', 'TAAA' ), + 'termtype' => 'alias', + ), + ), + ), + 'P22&P44 La en-ca with fallback all terms' => array( + $this->newTermSearchInteractor( true, true, 5000 ), + array( 'La', 'en-ca', 'property', $allTermTypes ), + array( + array( + 'entityId' => new PropertyId( 'P22' ), + 'term' => new Term( 'en-ca', 'Lama' ), + 'termtype' => 'label', + ), + array( + 'entityId' => new PropertyId( 'P44' ), + 'term' => new Term( 'en', 'Lama' ), + 'termtype' => 'label' , + ), + ), + ), + ); + } + + /** + * @dataProvider provideSearchForEntitiesTest + * + * @param TermIndexSearchInteractor $interactor + * @param array $params + * @param array[] $expectedTermsDetails each element has a 'term', 'termtype' and a 'entityId' key + */ + public function testSearchForEntities_returnsExpectedResults( $interactor, $params, $expectedTermsDetails ) { + // $interactor->searchForEntities() call + $results = call_user_func_array( array( $interactor, 'searchForEntities' ), $params ); + + $this->assertCount( + count( $expectedTermsDetails ), + $results, + 'Incorrect number of search results' + ); + + foreach ( $results as $key => $result ) { + $expectedTermDetails = $expectedTermsDetails[$key]; + + /** @var EntityId $expectedEntityId */ + $expectedEntityId = $expectedTermDetails['entityId']; + $this->assertTrue( $expectedEntityId->equals( $result['entityId'] ) ); + + /** @var Term $resultMatchedTerm */ + $resultMatchedTerm = $result['matchedTerm']; + /** @var Term $expectedTerm */ + $expectedTerm = $expectedTermDetails['term']; + $this->assertEquals( $expectedTerm, $resultMatchedTerm ); + + $resultMatchedTermType = $result['matchedTermType']; + $expectedTermType = $expectedTermDetails['termtype']; + $this->assertEquals( $expectedTermType, $resultMatchedTermType ); + + // These are mocked + $expectedDisplayTerms = array( + TermIndexEntry::TYPE_LABEL => $this->getDisplayTerm( + $expectedEntityId, + TermIndexEntry::TYPE_LABEL + ), + TermIndexEntry::TYPE_DESCRIPTION => $this->getDisplayTerm( + $expectedEntityId, + TermIndexEntry::TYPE_DESCRIPTION + ), + ); + $this->assertEquals( $expectedDisplayTerms, $result['displayTerms'] ); + } + } + + public function provideLimitInputAndExpected() { + return array( + array( 1, 1 ), + array( 5000, 5000 ), + array( 999999, 5000 ), + ); + } + + /** + * @dataProvider provideLimitInputAndExpected + */ + public function testSetLimit( $input, $expected ) { + $interactor = $this->newTermSearchInteractor(); + $interactor->setLimit( $input ); + $this->assertEquals( $expected, $interactor->getLimit() ); + } + + public function provideBooleanOptions() { + return array( + array( true ), + array( false ), + ); + } + + /** + * @dataProvider provideBooleanOptions + */ + public function testSetIsCaseSensitive( $booleanValue ) { + $interactor = $this->newTermSearchInteractor(); + $interactor->setIsCaseSensitive( $booleanValue ); + $this->assertEquals( $booleanValue, $interactor->getIsCaseSensitive() ); + } + + /** + * @dataProvider provideBooleanOptions + */ + public function testSetIsprefixSearch( $booleanValue ) { + $interactor = $this->newTermSearchInteractor(); + $interactor->setIsPrefixSearch( $booleanValue ); + $this->assertEquals( $booleanValue, $interactor->getIsPrefixSearch() ); + } + +} -- To view, visit https://gerrit.wikimedia.org/r/218607 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I06714ad2d793e7927a2eb07715e8853a2c170715 Gerrit-PatchSet: 33 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Addshore <addshorew...@gmail.com> Gerrit-Reviewer: Addshore <addshorew...@gmail.com> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Bene <benestar.wikime...@gmail.com> Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits