EBernhardson has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/286206

Change subject: Do a better job selecting which snippet to show
......................................................................

Do a better job selecting which snippet to show

Change-Id: If563b6d858a3f33c7f5a7f6e10ead6b55c97c077
---
M schema.mysql.sql
M src/RelevanceScoring/Import/ImportedResult.php
M src/RelevanceScoring/Repository/ResultsRepository.php
3 files changed, 36 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/discernatron 
refs/changes/06/286206/1

diff --git a/schema.mysql.sql b/schema.mysql.sql
index eee149f..5695852 100644
--- a/schema.mysql.sql
+++ b/schema.mysql.sql
@@ -25,15 +25,19 @@
 );
 CREATE TABLE IF NOT EXISTS `results_sources` (
     id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+    query_id INTEGER UNSIGNED NOT NULL,
     results_id INTEGER UNSIGNED NOT NULL,
     user_id INTEGER UNSIGNED NOT NULL,
     source VARCHAR(32) NOT NULL,
     position TINYINT UNSIGNED NOT NULL,
     snippet TEXT NOT NULL,
+    snippet_score TINYINT UNSIGNED NOT NULL,
     created INTEGER UNSIGNED NOT NULL,
-    FOREIGN KEY `results_user_id` (`user_id`) REFERENCES `users`(`id`),
+    FOREIGN KEY `results_source_query_id` (`query_id`) REFERENCES 
`queries`(`id`),
+    FOREIGN KEY `results_source_user_id` (`user_id`) REFERENCES `users`(`id`),
     FOREIGN KEY `results_source_results_id` (`results_id`) REFERENCES 
`results`(`id`),
-    UNIQUE KEY `results_source_results_id_source` (`results_id`, `source`)
+    UNIQUE KEY `results_source_results_id_source` (`results_id`, `source`),
+    KEY `results_sources_snippet_order` (`query_id`, `results_id`, 
`snippet_score`)
 );
 CREATE TABLE IF NOT EXISTS scores (
     id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
diff --git a/src/RelevanceScoring/Import/ImportedResult.php 
b/src/RelevanceScoring/Import/ImportedResult.php
index abaf784..9603a48 100644
--- a/src/RelevanceScoring/Import/ImportedResult.php
+++ b/src/RelevanceScoring/Import/ImportedResult.php
@@ -15,7 +15,7 @@
 
     /**
      * ImportedResult constructor.
-     * 
+     *
      * @param string $source
      * @param string $title
      * @param string $snippet
@@ -63,4 +63,21 @@
     {
         return $this->position;
     }
+
+    public function getSnippetScore()
+    {
+        if (strlen($this->snippet) === 0) {
+            return 0;
+        }
+        switch($this->source) {
+        case 'google':
+            return 100;
+        case 'bing':
+            return 80;
+        case 'ddg':
+            return 50;
+        default:
+            return 10;
+        }
+    }
 }
diff --git a/src/RelevanceScoring/Repository/ResultsRepository.php 
b/src/RelevanceScoring/Repository/ResultsRepository.php
index f7d6519..d5309a3 100644
--- a/src/RelevanceScoring/Repository/ResultsRepository.php
+++ b/src/RelevanceScoring/Repository/ResultsRepository.php
@@ -105,18 +105,24 @@
      */
     public function getQueryResults($queryId)
     {
-        // @todo this takes a randomly selected snippet. It might be better if
-        // we could rank snippets and take "the best".
         $sql = <<<EOD
 SELECT r.id, r.title, r_s.snippet
   FROM results r
+  JOIN (SELECT results_id, MAX(snippet_score) as snippet_score
+          FROM results_sources
+         WHERE query_id = ?
+         GROUP BY results_id
+       ) r_s_max
+    ON r.id = r_s_max.results_id
   JOIN results_sources r_s
-    ON r_s.results_id = r.id
+    ON r_s.results_id = r_s_max.results_id
+   AND r_s.snippet_score = r_s_max.snippet_score
+   AND r_s.query_id = ?
  WHERE r.query_id = ?
  GROUP BY r.id
  ORDER BY r.id DESC
 EOD;
-        $results = $this->db->fetchAll($sql, [$queryId]);
+        $results = $this->db->fetchAll($sql, [$queryId, $queryId, $queryId]);
         if ($results === false) {
             return new None();
         }
@@ -160,10 +166,12 @@
         foreach ($results as $result) {
             echo "Inserting {$result->getSource()}: 
{$resultIds[$result->getTitle()]} {$result->getTitle()}\n";
             $affected = $this->db->insert('results_sources', [
+                'query_id' => $queryId,
                 'results_id' => $resultIds[$result->getTitle()],
                 'user_id' => $userId,
                 'source' => $result->getSource(),
                 'snippet' => $result->getSnippet(),
+                'snippet_score' => $result->getSnippetScore(),
                 'position' => $result->getPosition(),
                 'created' => $now,
             ]);

-- 
To view, visit https://gerrit.wikimedia.org/r/286206
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If563b6d858a3f33c7f5a7f6e10ead6b55c97c077
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/discernatron
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to