Aude has uploaded a new change for review. https://gerrit.wikimedia.org/r/256023
Change subject: Introduce hook handlers for CirrusSearch [WIP] ...................................................................... Introduce hook handlers for CirrusSearch [WIP] We use the CirrusSearchMappingConfig and the CirrusSearchBuildDocumentParse hooks to add fields that contain a count of site links and count of statements. These fields can potentially be considered when ranking search results and boost some items with a high number of site links and/or statements. needs more tests... Change-Id: I34cce7281d10cc1c4176ebed6b9a3b90e10fe1da --- M repo/Wikibase.php A repo/includes/Hooks/BuildDocumentParseHookHandler.php A repo/includes/Hooks/MappingConfigHookHandler.php A repo/includes/Search/Fields/Field.php A repo/includes/Search/Fields/SiteLinkCountField.php A repo/includes/Search/Fields/StatementCountField.php A repo/includes/Search/Fields/WikibaseFieldsDefinition.php A repo/tests/phpunit/includes/Search/Fields/SiteLinkCountFieldTest.php A repo/tests/phpunit/includes/Search/Fields/StatementCountFieldTest.php 9 files changed, 343 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/23/256023/2 diff --git a/repo/Wikibase.php b/repo/Wikibase.php index a9202ca..8266469 100644 --- a/repo/Wikibase.php +++ b/repo/Wikibase.php @@ -243,6 +243,10 @@ $wgHooks['SkinMinervaDefaultModules'][] = 'Wikibase\RepoHooks::onSkinMinervaDefaultModules'; $wgHooks['ResourceLoaderRegisterModules'][] = 'Wikibase\RepoHooks::onResourceLoaderRegisterModules'; + // CirrusSearch hooks + $wgHooks['CirrusSearchMappingConfig'][] = 'Wikibase\Repo\Hooks\MappingConfigHookHandler::onCirrusSearchMappingConfig'; + $wgHooks['CirrusSearchBuildDocumentParse'][] = 'Wikibase\Repo\Hooks\BuildDocumentParseHookHandler::onCirrusSearchBuildDocumentParse'; + // update hooks $wgHooks['LoadExtensionSchemaUpdates'][] = '\Wikibase\Repo\Store\Sql\ChangesSubscriptionSchemaUpdater::onSchemaUpdate'; diff --git a/repo/includes/Hooks/BuildDocumentParseHookHandler.php b/repo/includes/Hooks/BuildDocumentParseHookHandler.php new file mode 100644 index 0000000..b661e4e --- /dev/null +++ b/repo/includes/Hooks/BuildDocumentParseHookHandler.php @@ -0,0 +1,79 @@ +<?php + +namespace Wikibase\Repo\Hooks; + +use CirrusSearch\Connection; +use Content; +use Elastica\Document; +use ParserOutput; +use Title; +use Wikibase\EntityContent; +use Wikibase\Repo\Search\Fields\WikibaseFieldsDefinition; + +/** + * Extension hooks + */ +class BuildDocumentParseHookHandler { + + /** + * @var WikibaseFieldsDefinition + */ + private $fieldsDefinition; + + /** + * @param Document $document + * @param Title $title + * @param Content $content + * @param ParserOutput $parserOutput + * @param Connection $connection + * + * @return bool + */ + public static function onCirrusSearchBuildDocumentParse( + Document $document, + Title $title, + Content $content, + ParserOutput $parserOutput, + Connection $connection + ) { + $hookHandler = self::newFromGlobalState(); + $hookHandler->indexExtraFields( $document, $content ); + + return true; + } + + /** + * @return BuildDocumentParserHookHandler + */ + public static function newFromGlobalState() { + return new self( + new WikibaseFieldsDefinition() + ); + } + + /** + * @param WikibaseFieldsDefinition $fieldsDefinition + */ + public function __construct( WikibaseFieldsDefinition $fieldsDefinition ) { + $this->fieldsDefinition = $fieldsDefinition; + } + + /** + * @param Document $document + * @param Content $content + */ + public function indexExtraFields( Document $document, Content $content ) { + if ( !$content instanceof EntityContent || $content->isRedirect() === true ) { + return; + } + + $fields = $this->fieldsDefinition->getFields(); + $entity = $content->getEntity(); + + foreach ( $fields as $fieldName => $field ) { + $data = $field->buildData( $entity ); + $document->set( $fieldName, $data ); + } + } + +} diff --git a/repo/includes/Hooks/MappingConfigHookHandler.php b/repo/includes/Hooks/MappingConfigHookHandler.php new file mode 100644 index 0000000..4c8a09e --- /dev/null +++ b/repo/includes/Hooks/MappingConfigHookHandler.php @@ -0,0 +1,55 @@ +<?php + +namespace Wikibase\Repo\Hooks; + +use CirrusSearch\Maintenance\MappingConfigBuilder; +use Wikibase\Repo\Search\Fields\WikibaseFieldsDefinition; + +class MappingConfigHookHandler { + + /** + * @var WikibaseFieldsDefinition + */ + private $fieldsDefinition; + + /** + * @param array &$config + * @param MappingConfigBuilder $mappingConfigBuilder + * + * @return bool + */ + public static function onCirrusSearchMappingConfig( + array &$config, + MappingConfigBuilder $mappingConfigBuilder + ) { + $handler = self::newFromGlobalState(); + $handler->addExtraFields( $config ); + + return true; + } + + private static function newFromGlobalState() { + return new self( + new WikibaseFieldsDefinition() + ); + } + + /** + * @param WikibaseFieldsDefinition $fieldsDefinition + */ + public function __construct( WikibaseFieldsDefinition $fieldsDefinition ) { + $this->fieldsDefinition = $fieldsDefinition; + } + + /** + * @param array &$config + */ + public function addExtraFields( array &$config ) { + $fields = $this->fieldsDefinition->getFields(); + + foreach ( $fields as $fieldName => $field ) { + $config['page']['properties'][$fieldName] = $field->getMapping(); + } + } + +} diff --git a/repo/includes/Search/Fields/Field.php b/repo/includes/Search/Fields/Field.php new file mode 100644 index 0000000..f98c6ad --- /dev/null +++ b/repo/includes/Search/Fields/Field.php @@ -0,0 +1,22 @@ +<?php + +namespace Wikibase\Repo\Search\Fields; + +use Wikibase\DataModel\Entity\EntityDocument; + +interface Field { + + /** + * @return array + */ + public function getMapping(); + + /** + * @param EntityDocument $entity + * + * @return mixed Either an array with nested data, or + * an int or string for simple field types. + */ + public function buildData( EntityDocument $entity ); + +} diff --git a/repo/includes/Search/Fields/SiteLinkCountField.php b/repo/includes/Search/Fields/SiteLinkCountField.php new file mode 100644 index 0000000..081558f --- /dev/null +++ b/repo/includes/Search/Fields/SiteLinkCountField.php @@ -0,0 +1,30 @@ +<?php + +namespace Wikibase\Repo\Search\Fields; + +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Entity\Item; + +class SiteLinkCountField implements Field { + + /** + * @return array + */ + public function getMapping() { + return array( + 'type' => 'long' + ); + } + + /** + * @see Field::buildData + */ + public function buildData( EntityDocument $entity ) { + if ( $entity instanceof Item ) { + return $entity->getSiteLinkList()->count(); + } + + return 0; + } + +} diff --git a/repo/includes/Search/Fields/StatementCountField.php b/repo/includes/Search/Fields/StatementCountField.php new file mode 100644 index 0000000..9d5986b --- /dev/null +++ b/repo/includes/Search/Fields/StatementCountField.php @@ -0,0 +1,30 @@ +<?php + +namespace Wikibase\Repo\Search\Fields; + +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Statement\StatementListHolder; + +class StatementCountField implements Field { + + /** + * @return array + */ + public function getMapping() { + return array( + 'type' => 'long' + ); + } + + /** + * @see Field::buildData + */ + public function buildData( EntityDocument $entity ) { + if ( $entity instanceof StatementListHolder ) { + return $entity->getStatements()->count(); + } + + return 0; + } + +} diff --git a/repo/includes/Search/Fields/WikibaseFieldsDefinition.php b/repo/includes/Search/Fields/WikibaseFieldsDefinition.php new file mode 100644 index 0000000..eaeccf6 --- /dev/null +++ b/repo/includes/Search/Fields/WikibaseFieldsDefinition.php @@ -0,0 +1,23 @@ +<?php + +namespace Wikibase\Repo\Search\Fields; + +use Hooks; + +class WikibaseFieldsDefinition { + + /** + * @return Field[] Array key is field name. + */ + public function getFields() { + $fields = array( + 'sitelink_count' => new SiteLinkCountField(), + 'statement_count' => new StatementCountField() + ); + + Hooks::run( 'WikibaseSearchFields', array( &$fields ) ); + + return $fields; + } + +} diff --git a/repo/tests/phpunit/includes/Search/Fields/SiteLinkCountFieldTest.php b/repo/tests/phpunit/includes/Search/Fields/SiteLinkCountFieldTest.php new file mode 100644 index 0000000..24d8c6c --- /dev/null +++ b/repo/tests/phpunit/includes/Search/Fields/SiteLinkCountFieldTest.php @@ -0,0 +1,52 @@ +<?php + +namespace Wikibase\Test; + +use Wikibase\DataModel\Entity\Item; +use Wikibase\DataModel\Entity\Property; +use Wikibase\Repo\Search\Fields\SiteLinkCountField; + +/** + * @covers Wikibase\Repo\Search\Fields\SiteLinkCountField + * + * @group WikibaseRepo + * @group WikibaseSearch + * @group Wikibase + * + * @licence GNU GPL v2+ + * @author Katie Filbert < aude.w...@gmail.com > + */ +class SiteLinkCountFieldTest extends \PHPUnit_Framework_TestCase { + + public function testGetMapping() { + $siteLinkCountField = new SiteLinkCountField(); + + $expected = array( + 'type' => 'long' + ); + + $this->assertSame( $expected, $siteLinkCountField->getMapping() ); + } + + /** + * @dataProvider buildDataProvider + */ + public function testBuildData( $expected, $entity ) { + $siteLinkCountField = new SiteLinkCountField(); + + $this->assertSame( $expected, $siteLinkCountField->buildData( $entity ) ); + } + + public function buildDataProvider() { + $item = new Item(); + + $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' ); + $item->getSiteLinkList()->addNewSiteLink( 'eswiki', 'Gato' ); + + return array( + array( 2, $item ), + array( 0, Property::newFromType( 'string' ) ) + ); + } + +} diff --git a/repo/tests/phpunit/includes/Search/Fields/StatementCountFieldTest.php b/repo/tests/phpunit/includes/Search/Fields/StatementCountFieldTest.php new file mode 100644 index 0000000..b08b8f9 --- /dev/null +++ b/repo/tests/phpunit/includes/Search/Fields/StatementCountFieldTest.php @@ -0,0 +1,48 @@ +<?php + +namespace Wikibase\Test; + +use DataValues\StringValue; +use Wikibase\DataModel\Entity\Item; +use Wikibase\DataModel\Entity\PropertyId; +use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\Statement\StatementList; +use Wikibase\Repo\Search\Fields\StatementCountField; + +/** + * @covers Wikibase\Repo\Search\Fields\StatementCountField + * + * @group WikibaseRepo + * @group WikibaseSearch + * @group Wikibase + * + * @licence GNU GPL v2+ + * @author Katie Filbert < aude.w...@gmail.com > + */ +class StatementCountFieldTest extends \PHPUnit_Framework_TestCase { + + public function testGetMapping() { + $statementCountField = new StatementCountField(); + + $expected = array( + 'type' => 'long' + ); + + $this->assertSame( $expected, $statementCountField->getMapping() ); + } + + public function testBuildData() { + $statementCountField = new StatementCountField(); + + $statements = new StatementList(); + $statements->addNewStatement( + new PropertyValueSnak( new PropertyId( 'P1' ), new StringValue( 'o_O' ) ) + ); + + $item = new Item(); + $item->setStatements( $statements ); + + $this->assertSame( 1, $statementCountField->buildData( $item ) ); + } + +} -- To view, visit https://gerrit.wikimedia.org/r/256023 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I34cce7281d10cc1c4176ebed6b9a3b90e10fe1da Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Aude <aude.w...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits