EBernhardson has uploaded a new change for review. https://gerrit.wikimedia.org/r/286206
Change subject: Do a better job selecting which snippet to show ...................................................................... Do a better job selecting which snippet to show Change-Id: If563b6d858a3f33c7f5a7f6e10ead6b55c97c077 --- M schema.mysql.sql M src/RelevanceScoring/Import/ImportedResult.php M src/RelevanceScoring/Repository/ResultsRepository.php 3 files changed, 36 insertions(+), 7 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/discernatron refs/changes/06/286206/1 diff --git a/schema.mysql.sql b/schema.mysql.sql index eee149f..5695852 100644 --- a/schema.mysql.sql +++ b/schema.mysql.sql @@ -25,15 +25,19 @@ ); CREATE TABLE IF NOT EXISTS `results_sources` ( id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + query_id INTEGER UNSIGNED NOT NULL, results_id INTEGER UNSIGNED NOT NULL, user_id INTEGER UNSIGNED NOT NULL, source VARCHAR(32) NOT NULL, position TINYINT UNSIGNED NOT NULL, snippet TEXT NOT NULL, + snippet_score TINYINT UNSIGNED NOT NULL, created INTEGER UNSIGNED NOT NULL, - FOREIGN KEY `results_user_id` (`user_id`) REFERENCES `users`(`id`), + FOREIGN KEY `results_source_query_id` (`query_id`) REFERENCES `queries`(`id`), + FOREIGN KEY `results_source_user_id` (`user_id`) REFERENCES `users`(`id`), FOREIGN KEY `results_source_results_id` (`results_id`) REFERENCES `results`(`id`), - UNIQUE KEY `results_source_results_id_source` (`results_id`, `source`) + UNIQUE KEY `results_source_results_id_source` (`results_id`, `source`), + KEY `results_sources_snippet_order` (`query_id`, `results_id`, `snippet_score`) ); CREATE TABLE IF NOT EXISTS scores ( id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, diff --git a/src/RelevanceScoring/Import/ImportedResult.php b/src/RelevanceScoring/Import/ImportedResult.php index abaf784..9603a48 100644 --- a/src/RelevanceScoring/Import/ImportedResult.php +++ b/src/RelevanceScoring/Import/ImportedResult.php @@ -15,7 +15,7 @@ /** * ImportedResult constructor. - * + * * @param string $source * @param string $title * @param string $snippet @@ -63,4 +63,21 @@ { return $this->position; } + + public function getSnippetScore() + { + if (strlen($this->snippet) === 0) { + return 0; + } + switch($this->source) { + case 'google': + return 100; + case 'bing': + return 80; + case 'ddg': + return 50; + default: + return 10; + } + } } diff --git a/src/RelevanceScoring/Repository/ResultsRepository.php b/src/RelevanceScoring/Repository/ResultsRepository.php index f7d6519..d5309a3 100644 --- a/src/RelevanceScoring/Repository/ResultsRepository.php +++ b/src/RelevanceScoring/Repository/ResultsRepository.php @@ -105,18 +105,24 @@ */ public function getQueryResults($queryId) { - // @todo this takes a randomly selected snippet. It might be better if - // we could rank snippets and take "the best". $sql = <<<EOD SELECT r.id, r.title, r_s.snippet FROM results r + JOIN (SELECT results_id, MAX(snippet_score) as snippet_score + FROM results_sources + WHERE query_id = ? + GROUP BY results_id + ) r_s_max + ON r.id = r_s_max.results_id JOIN results_sources r_s - ON r_s.results_id = r.id + ON r_s.results_id = r_s_max.results_id + AND r_s.snippet_score = r_s_max.snippet_score + AND r_s.query_id = ? WHERE r.query_id = ? GROUP BY r.id ORDER BY r.id DESC EOD; - $results = $this->db->fetchAll($sql, [$queryId]); + $results = $this->db->fetchAll($sql, [$queryId, $queryId, $queryId]); if ($results === false) { return new None(); } @@ -160,10 +166,12 @@ foreach ($results as $result) { echo "Inserting {$result->getSource()}: {$resultIds[$result->getTitle()]} {$result->getTitle()}\n"; $affected = $this->db->insert('results_sources', [ + 'query_id' => $queryId, 'results_id' => $resultIds[$result->getTitle()], 'user_id' => $userId, 'source' => $result->getSource(), 'snippet' => $result->getSnippet(), + 'snippet_score' => $result->getSnippetScore(), 'position' => $result->getPosition(), 'created' => $now, ]); -- To view, visit https://gerrit.wikimedia.org/r/286206 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If563b6d858a3f33c7f5a7f6e10ead6b55c97c077 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/discernatron Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits