Aude has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/257291

Change subject: Add label count field to search index
......................................................................

Add label count field to search index

For items like Q6581097 (male) and Q6581072 (female),
label count is probably also an important consideration.

although these have a lot of incoming links, so maybe
label count won't be essential. But think we should
include it and have it as an option when trying various
ways of rescoring search results.

Implementation is done in a consistent way as the other
fields we are introducing (sitelink and statement count).

Change-Id: I9b9d5eb69883412627257c9e90688af50af967b5
---
A repo/includes/Search/Elastic/Fields/LabelCountField.php
M repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
M repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
A repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php
M 
repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
5 files changed, 90 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/91/257291/1

diff --git a/repo/includes/Search/Elastic/Fields/LabelCountField.php 
b/repo/includes/Search/Elastic/Fields/LabelCountField.php
new file mode 100644
index 0000000..c473071
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/LabelCountField.php
@@ -0,0 +1,42 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+use Wikibase\DataModel\Entity\EntityDocument;
+use Wikibase\DataModel\Term\FingerprintProvider;
+
+/**
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < aude.w...@gmail.com >
+ */
+class LabelCountField implements SearchIndexField {
+
+       /**
+        * @see SearchIndexField::getMapping
+        *
+        * @return array
+        */
+       public function getMapping() {
+               return array(
+                       'type' => 'integer'
+               );
+       }
+
+       /**
+        * @see SearchIndexField::getFieldData
+        *
+        * @param EntityDocument $entity
+        *
+        * @return int
+        */
+       public function getFieldData( EntityDocument $entity ) {
+               if ( $entity instanceof FingerprintProvider ) {
+                       return $entity->getFingerprint()->getLabels()->count();
+               }
+
+               return 0;
+       }
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php 
b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
index dade9e9..bd98160 100644
--- a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
+++ b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
@@ -9,6 +9,7 @@
         */
        public function getFields() {
                $fields = array(
+                       'label_count' => new LabelCountField(),
                        'sitelink_count' => new SiteLinkCountField(),
                        'statement_count' => new StatementCountField()
                );
diff --git a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php 
b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
index af2acc2..66dfab4 100644
--- a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
+++ b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
@@ -50,6 +50,7 @@
                        $connection
                );
 
+               $this->assertSame( 1, $document->get( 'label_count' ), 
'label_count' );
                $this->assertSame( 1, $document->get( 'sitelink_count' ), 
'sitelink_count' );
                $this->assertSame( 1, $document->get( 'statement_count' ), 
'statement_count' );
        }
@@ -70,7 +71,7 @@
                CirrusSearchHookHandlers::onCirrusSearchMappingConfig( $config, 
$mappingConfigBuilder );
 
                $this->assertSame(
-                       array( 'sitelink_count', 'statement_count' ),
+                       array( 'label_count', 'sitelink_count', 
'statement_count' ),
                        array_keys( $config['page']['properties'] )
                );
        }
@@ -84,6 +85,7 @@
                $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions 
);
                $hookHandlers->indexExtraFields( $document, $content );
 
+               $this->assertSame( 1, $document->get( 'label_count' ), 
'label_count' );
                $this->assertSame( 1, $document->get( 'sitelink_count' ), 
'sitelink_count' );
                $this->assertSame( 1, $document->get( 'statement_count' ), 
'statement_count' );
        }
@@ -103,6 +105,9 @@
                $expected = array(
                        'page' => array(
                                'properties' => array(
+                                       'label_count' => array(
+                                               'type' => 'integer'
+                                       ),
                                        'sitelink_count' => array(
                                                'type' => 'integer'
                                        ),
@@ -143,6 +148,7 @@
 
        private function getContent() {
                $item = new Item();
+               $item->getFingerprint()->setLabel( 'en', 'Kitten' );
                $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' );
                $item->getStatements()->addNewStatement(
                        new PropertyNoValueSnak( new PropertyId( 'P1' ) )
diff --git 
a/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php
new file mode 100644
index 0000000..1192cc9
--- /dev/null
+++ b/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php
@@ -0,0 +1,39 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\Repo\Search\Elastic\Fields\LabelCountField;
+
+/**
+ * @covers Wikibase\Repo\Search\Elastic\Fields\LabelCountField
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < aude.w...@gmail.com >
+ */
+class LabelCountFieldTest extends \PHPUnit_Framework_TestCase {
+
+       public function testGetMapping() {
+               $labelCountField = new LabelCountField();
+
+               $expected = array(
+                       'type' => 'integer'
+               );
+
+               $this->assertSame( $expected, $labelCountField->getMapping() );
+       }
+
+       public function testGetFieldData() {
+               $labelCountField = new LabelCountField();
+
+               $item = new Item();
+               $item->getFingerprint()->setLabel( 'es', 'Gato' );
+
+               $this->assertSame( 1, $labelCountField->getFieldData( $item ) );
+       }
+
+}
diff --git 
a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
index d7830f5..7b8a3ae 100644
--- 
a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
+++ 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
@@ -20,7 +20,7 @@
                $wikibaseFieldDefinitions = new WikibaseFieldDefinitions();
                $fields = $wikibaseFieldDefinitions->getFields();
 
-               $expectedFieldNames = array( 'sitelink_count', 
'statement_count' );
+               $expectedFieldNames = array( 'label_count', 'sitelink_count', 
'statement_count' );
 
                $this->assertSame( $expectedFieldNames, array_keys( $fields ) );
        }

-- 
To view, visit https://gerrit.wikimedia.org/r/257291
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9b9d5eb69883412627257c9e90688af50af967b5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Aude <aude.w...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to