jenkins-bot has submitted this change and it was merged. Change subject: Use max( |sitelinks|, |labels| ) for term weight. ......................................................................
Use max( |sitelinks|, |labels| ) for term weight. Considering the number of labels for the term weight boosts the ranking of items that have no/few sitelinks, but many labels. This may be true for "structural" items like Q6581097, which have no corresponding wikipedia pages. Bug: T94404 Change-Id: I17ad13eb0496e5c90cf6c47749aa5523c1954728 --- M lib/includes/store/sql/TermSqlIndex.php M repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php 2 files changed, 13 insertions(+), 7 deletions(-) Approvals: Hoo man: Looks good to me, but someone else must approve Aude: Looks good to me, approved Thiemo Mättig (WMDE): Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/lib/includes/store/sql/TermSqlIndex.php b/lib/includes/store/sql/TermSqlIndex.php index 535d2ec..8c3cae1 100644 --- a/lib/includes/store/sql/TermSqlIndex.php +++ b/lib/includes/store/sql/TermSqlIndex.php @@ -288,8 +288,7 @@ /** * Calculate a weight the given entity to be used for ranking. Should be normalized * between 0 and 1, but that's not a strong constraint. - * This implementation relies on sitelinks, and simply takes the number of sitelinks - * as the weight. + * This implementation uses the max of the number of labels and the number of sitelinks. * * TODO Should be moved to its own object and be added via dependency injection * @@ -300,11 +299,17 @@ private function getWeight( EntityDocument $entity ) { // FIXME: OCP violation. No support for new types of entities can be registered - if ( $entity instanceof Item ) { - return $entity->getSiteLinkList()->count() / 1000.0; + $weight = 0.0; + + if ( $entity instanceof FingerprintProvider ) { + $weight = max( $weight, $entity->getFingerprint()->getLabels()->count() / 1000.0 ); } - return 0.0; + if ( $entity instanceof Item ) { + $weight = max( $weight, $entity->getSiteLinkList()->count() / 1000.0 ); + } + + return $weight; } /** diff --git a/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php b/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php index b6c437a..ad7752e 100644 --- a/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php +++ b/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php @@ -200,10 +200,11 @@ $termIndex->saveTermsOfEntity( $item2 ); + // The number of labels counts too $item3 = new Item( new ItemId( 'Q108' ) ); $item3->setLabel( $languageCode, $termText ); - $item3->getSiteLinkList()->addNewSiteLink( 'hrwiki', 'C' ); - $item3->getSiteLinkList()->addNewSiteLink( 'uzwiki', 'C' ); + $item3->setLabel( 'qxy', $termText ); + $item3->setLabel( 'qxz', $termText ); $termIndex->saveTermsOfEntity( $item3 ); -- To view, visit https://gerrit.wikimedia.org/r/202456 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I17ad13eb0496e5c90cf6c47749aa5523c1954728 Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Hoo man <h...@online.de> Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits