Aude has uploaded a new change for review. https://gerrit.wikimedia.org/r/257291
Change subject: Add label count field to search index ...................................................................... Add label count field to search index For items like Q6581097 (male) and Q6581072 (female), label count is probably also an important consideration. although these have a lot of incoming links, so maybe label count won't be essential. But think we should include it and have it as an option when trying various ways of rescoring search results. Implementation is done in a consistent way as the other fields we are introducing (sitelink and statement count). Change-Id: I9b9d5eb69883412627257c9e90688af50af967b5 --- A repo/includes/Search/Elastic/Fields/LabelCountField.php M repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php M repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php A repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php M repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php 5 files changed, 90 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/91/257291/1 diff --git a/repo/includes/Search/Elastic/Fields/LabelCountField.php b/repo/includes/Search/Elastic/Fields/LabelCountField.php new file mode 100644 index 0000000..c473071 --- /dev/null +++ b/repo/includes/Search/Elastic/Fields/LabelCountField.php @@ -0,0 +1,42 @@ +<?php + +namespace Wikibase\Repo\Search\Elastic\Fields; + +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Term\FingerprintProvider; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < aude.w...@gmail.com > + */ +class LabelCountField implements SearchIndexField { + + /** + * @see SearchIndexField::getMapping + * + * @return array + */ + public function getMapping() { + return array( + 'type' => 'integer' + ); + } + + /** + * @see SearchIndexField::getFieldData + * + * @param EntityDocument $entity + * + * @return int + */ + public function getFieldData( EntityDocument $entity ) { + if ( $entity instanceof FingerprintProvider ) { + return $entity->getFingerprint()->getLabels()->count(); + } + + return 0; + } + +} diff --git a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php index dade9e9..bd98160 100644 --- a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php +++ b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php @@ -9,6 +9,7 @@ */ public function getFields() { $fields = array( + 'label_count' => new LabelCountField(), 'sitelink_count' => new SiteLinkCountField(), 'statement_count' => new StatementCountField() ); diff --git a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php index af2acc2..66dfab4 100644 --- a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php +++ b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php @@ -50,6 +50,7 @@ $connection ); + $this->assertSame( 1, $document->get( 'label_count' ), 'label_count' ); $this->assertSame( 1, $document->get( 'sitelink_count' ), 'sitelink_count' ); $this->assertSame( 1, $document->get( 'statement_count' ), 'statement_count' ); } @@ -70,7 +71,7 @@ CirrusSearchHookHandlers::onCirrusSearchMappingConfig( $config, $mappingConfigBuilder ); $this->assertSame( - array( 'sitelink_count', 'statement_count' ), + array( 'label_count', 'sitelink_count', 'statement_count' ), array_keys( $config['page']['properties'] ) ); } @@ -84,6 +85,7 @@ $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions ); $hookHandlers->indexExtraFields( $document, $content ); + $this->assertSame( 1, $document->get( 'label_count' ), 'label_count' ); $this->assertSame( 1, $document->get( 'sitelink_count' ), 'sitelink_count' ); $this->assertSame( 1, $document->get( 'statement_count' ), 'statement_count' ); } @@ -103,6 +105,9 @@ $expected = array( 'page' => array( 'properties' => array( + 'label_count' => array( + 'type' => 'integer' + ), 'sitelink_count' => array( 'type' => 'integer' ), @@ -143,6 +148,7 @@ private function getContent() { $item = new Item(); + $item->getFingerprint()->setLabel( 'en', 'Kitten' ); $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' ); $item->getStatements()->addNewStatement( new PropertyNoValueSnak( new PropertyId( 'P1' ) ) diff --git a/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php b/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php new file mode 100644 index 0000000..1192cc9 --- /dev/null +++ b/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php @@ -0,0 +1,39 @@ +<?php + +namespace Wikibase\Test; + +use Wikibase\DataModel\Entity\Item; +use Wikibase\Repo\Search\Elastic\Fields\LabelCountField; + +/** + * @covers Wikibase\Repo\Search\Elastic\Fields\LabelCountField + * + * @group WikibaseElastic + * @group WikibaseRepo + * @group Wikibase + * + * @licence GNU GPL v2+ + * @author Katie Filbert < aude.w...@gmail.com > + */ +class LabelCountFieldTest extends \PHPUnit_Framework_TestCase { + + public function testGetMapping() { + $labelCountField = new LabelCountField(); + + $expected = array( + 'type' => 'integer' + ); + + $this->assertSame( $expected, $labelCountField->getMapping() ); + } + + public function testGetFieldData() { + $labelCountField = new LabelCountField(); + + $item = new Item(); + $item->getFingerprint()->setLabel( 'es', 'Gato' ); + + $this->assertSame( 1, $labelCountField->getFieldData( $item ) ); + } + +} diff --git a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php index d7830f5..7b8a3ae 100644 --- a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php +++ b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php @@ -20,7 +20,7 @@ $wikibaseFieldDefinitions = new WikibaseFieldDefinitions(); $fields = $wikibaseFieldDefinitions->getFields(); - $expectedFieldNames = array( 'sitelink_count', 'statement_count' ); + $expectedFieldNames = array( 'label_count', 'sitelink_count', 'statement_count' ); $this->assertSame( $expectedFieldNames, array_keys( $fields ) ); } -- To view, visit https://gerrit.wikimedia.org/r/257291 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I9b9d5eb69883412627257c9e90688af50af967b5 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Aude <aude.w...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits