EBernhardson has submitted this change and it was merged. Change subject: Using a queue for what gets scored next ......................................................................
Using a queue for what gets scored next Randomized scoring seems sub-optimal. The problem is that we can't get the desired number of queries scored and can end up with some queries scored many more times than others. By using a queue we have much more control over how many times each query gets scored. Gives queue items a priority such that we will score everything once, then everything a second time, until the queue is empty. Change-Id: I84f70674100b7a1cf37c8db7051298c523241a52 --- M app.php M schema.mysql.sql A src/RelevanceScoring/Console/UpdateScoringQueue.php M src/RelevanceScoring/Controller/QueriesController.php M src/RelevanceScoring/Import/Importer.php M src/RelevanceScoring/RelevanceScoringProvider.php M src/RelevanceScoring/Repository/QueriesRepository.php M src/RelevanceScoring/Repository/ScoresRepository.php A src/RelevanceScoring/Repository/ScoringQueueRepository.php A src/RelevanceScoring/Util/Calendar.php M tests/unit/RelevanceScoring/Import/HtmlResultGetterTest.php M tests/unit/RelevanceScoring/Repository/BaseRepositoryTest.php M tests/unit/RelevanceScoring/Repository/ResultsRepositoryTest.php M tests/unit/RelevanceScoring/Repository/ScoresRepositoryTest.php A tests/unit/RelevanceScoring/Repository/ScoringQueueRepositoryTest.php M views/instructions.twig M views/layout.twig 17 files changed, 873 insertions(+), 35 deletions(-) Approvals: EBernhardson: Verified; Looks good to me, approved diff --git a/app.php b/app.php index dc50c5c..97b3290 100644 --- a/app.php +++ b/app.php @@ -70,6 +70,7 @@ 'search.wikis' => [ 'enwiki' => 'https://en.wikipedia.org/w/api.php', ], + 'search.scores_per_query' => 5, ]); $app->mount('/', $relevanceScoringProvider); @@ -141,11 +142,15 @@ }); $app->get('/', function () use ($app) { - return $app->redirect($app->path('random_query')); + return $app->redirect($app->path('next_query')); }) ->bind('root'); $app->get('/login', function () use ($app) { + if ($app['session']->has('user')) { + return $app->redirect($app->path('next_query')); + } + return $app['twig']->render('splash.twig', [ 'domain' => parse_url($app['oauth.base_url'], PHP_URL_HOST), ]); diff --git a/schema.mysql.sql b/schema.mysql.sql index 4b49596..826d56f 100644 --- a/schema.mysql.sql +++ b/schema.mysql.sql @@ -61,3 +61,14 @@ FOREIGN KEY `queries_skipped_query_id` (query_id) REFERENCES queries(id), UNIQUE KEY `queries_skipped_user_query` (`user_id`, `query_id`) ) CHARSET=utf8mb4; +CREATE TABLE IF NOT EXISTS `scoring_queue` ( + id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + user_id INTEGER UNSIGNED, + query_id INTEGER UNSIGNED NOT NULL, + priority INTEGER UNSIGNED NOT NULL, + last_assigned INTEGER UNSIGNED, + FOREIGN KEY `queries_skipped_user_id` (user_id) REFERENCES users(id), + FOREIGN KEY `queries_skipped_query_id` (query_id) REFERENCES queries(id), + KEY `last_assigned_sort_key` (`last_assigned`, `query_id`), + KEY `scoring_queue_priority` (`priority`, `query_id`) +) CHARSET=utf8mb4; diff --git a/src/RelevanceScoring/Console/UpdateScoringQueue.php b/src/RelevanceScoring/Console/UpdateScoringQueue.php new file mode 100644 index 0000000..2e60e09 --- /dev/null +++ b/src/RelevanceScoring/Console/UpdateScoringQueue.php @@ -0,0 +1,122 @@ +<?php + +namespace WikiMedia\RelevanceScoring\Console; + +use Knp\Command\Command; +use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; +use WikiMedia\RelevanceScoring\Repository\QueriesRepository; +use WikiMedia\RelevanceScoring\Repository\ScoresRepository; +use WikiMedia\RelevanceScoring\Repository\ScoringQueueRepository; + +class UpdateScoringQueue extends Command +{ + /** @var QueriesRepository */ + private $queriesRepo; + /** @var ScoringQueueRepository */ + private $scoringQueueRepo; + /** @var ScoresRepository */ + private $scoresRepo; + + public function __construct( + QueriesRepository $queriesRepo, + ScoringQueueRepository $scoringQueueRepo, + ScoresRepository $scoresRepo + ) { + parent::__construct('update-scoring-queue'); + $this->queriesRepo = $queriesRepo; + $this->scoringQueueRepo = $scoringQueueRepo; + $this->scoresRepo = $scoresRepo; + } + + protected function configure() + { + $this->setDescription('Manage the scoring queue'); + $this->addOption( + 'num', + null, + InputOption::VALUE_OPTIONAL, + 'Total number of scores desired' + ); + $this->addArgument( + 'wiki', + InputArgument::OPTIONAL, + 'Only update the specified wiki' + ); + $this->addArgument( + 'query', + InputArgument::OPTIONAL, + 'Only update the specified query. Wiki must be provided.' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $num = $input->getOption('num') ?: $this->scoringQueueRepo->getDefaultNumSlots(); + $wiki = $input->getArgument('wiki'); + $query = $input->getArgument('query'); + + $queryIds = $this->getQueryIds($output, $wiki, $query); + if ($queryIds === null) { + return 1; + } + + $count = 0; + $scores = $this->scoresRepo->getNumberOfScores($queryIds); + $pending = $this->scoringQueueRepo->getNumberPending($queryIds); + foreach ($queryIds as $queryId) { + $needed = $num; + if (isset($scores[$queryId])) { + $needed -= $scores[$queryId]; + } + if (isset($pending[$queryId])) { + $needed -= $pending[$queryId]; + } + if ($needed > 0) { + $count += $this->scoringQueueRepo->insert($queryId, $needed); + } + } + + $output->writeln("Added $count items to queue for ".count($queryIds).' queries'); + + return 0; + } + + private function getQueryIds(OutputInterface $output, $wiki, $query) + { + if ($query !== null) { + if ($wiki === null) { + $output->writeln('wiki is required.'); + + return; + } + + $maybeQueryId = $this->queriesRepo->findQueryId($wiki, $query); + if ($maybeQueryId->isEmpty()) { + $output->writeln('Unknown query'); + + return; + } + $maybeQuery = $this->queriesRepo->getQuery($maybeQueryId->get()); + if ($maybeQuery->isEmpty()) { + $output->writeln('Found query id but no query?!?'); + + return; + } + $query = $maybeQuery->get(); + if (!$query['imported']) { + $output->writeln('Query has not been imported yet'); + + return; + } + + return [$maybeQueryId->get()]; + } elseif ($wiki !== null) { + return $this->queriesRepo->findImportedQueryIdsForWiki($wiki); + } else { + return $this->queriesRepo->findAllImportedQueryIds(); + } + } +} diff --git a/src/RelevanceScoring/Controller/QueriesController.php b/src/RelevanceScoring/Controller/QueriesController.php index e0b2969..fc37cc0 100644 --- a/src/RelevanceScoring/Controller/QueriesController.php +++ b/src/RelevanceScoring/Controller/QueriesController.php @@ -11,6 +11,7 @@ use WikiMedia\RelevanceScoring\Repository\QueriesRepository; use WikiMedia\RelevanceScoring\Repository\ResultsRepository; use WikiMedia\RelevanceScoring\Repository\ScoresRepository; +use WikiMedia\RelevanceScoring\Repository\ScoringQueueRepository; class QueriesController { @@ -39,6 +40,7 @@ QueriesRepository $queriesRepo, ResultsRepository $resultsRepo, ScoresRepository $scoresRepo, + ScoringQueueRepository $scoringQueueRepo, array $wikis ) { $this->app = $app; @@ -48,6 +50,7 @@ $this->queriesRepo = $queriesRepo; $this->resultsRepo = $resultsRepo; $this->scoresRepo = $scoresRepo; + $this->scoringQueueRepo = $scoringQueueRepo; $this->wikis = $wikis; } @@ -56,9 +59,9 @@ return $this->twig->render('instructions.twig'); } - public function randomQuery(Request $request, $wiki = null) + public function nextQuery(Request $request) { - $maybeId = $this->queriesRepo->getRandomUngradedQuery($this->user, $wiki); + $maybeId = $this->scoringQueueRepo->pop($this->user); $params = []; if ($request->query->get('saved')) { $params['saved'] = 1; @@ -88,9 +91,10 @@ // tell users about this. if ($form->isValid()) { $this->queriesRepo->markQuerySkipped($this->user, $id); + $this->scoringQueueRepo->unassignUser($this->user); } - return $this->app->redirect($this->app->path('random_query')); + return $this->app->redirect($this->app->path('next_query')); } public function queryById(Request $request, $id) @@ -119,8 +123,9 @@ if ($form->isValid()) { $this->scoresRepo->storeQueryScores($this->user, $id, $form->getData()); + $this->scoringQueueRepo->markScored($this->user, $id); - return $this->app->redirect($this->app->path('random_query', ['saved' => 1])); + return $this->app->redirect($this->app->path('next_query', ['saved' => 1])); } return $this->twig->render('score_query.twig', [ diff --git a/src/RelevanceScoring/Import/Importer.php b/src/RelevanceScoring/Import/Importer.php index af17fa5..3b1ba79 100644 --- a/src/RelevanceScoring/Import/Importer.php +++ b/src/RelevanceScoring/Import/Importer.php @@ -7,13 +7,16 @@ use WikiMedia\RelevanceScoring\Exception\RuntimeException; use WikiMedia\RelevanceScoring\Repository\QueriesRepository; use WikiMedia\RelevanceScoring\Repository\ResultsRepository; +use WikiMedia\RelevanceScoring\Repository\ScoringQueueRepository; class Importer { private $db; private $queriesRepo; private $resultsRepo; + private $scoringQueueRepo; private $wikis; + private $getters; private $resultsPerSource = 25; /** @@ -25,12 +28,14 @@ Connection $db, QueriesRepository $queriesRepo, ResultsRepository $resultsRepo, + ScoringQueueRepository $scoringQueueRepo, array $wikis, array $getters ) { $this->db = $db; $this->queriesRepo = $queriesRepo; $this->resultsRepo = $resultsRepo; + $this->scoringQueueRepo = $scoringQueueRepo; $this->wikis = $wikis; $this->getters = $getters; $this->output = function () {}; @@ -62,6 +67,7 @@ $this->db->transactional(function () use ($user, $wiki, $query, $results) { $queryId = $this->queriesRepo->createQuery($user, $wiki, $query, 'imported'); $this->resultsRepo->storeResults($user, $queryId, $results); + $this->scoringQueueRepo->insert($queryId); }); return count($results); @@ -77,6 +83,7 @@ $this->db->transactional(function () use ($query, $results) { $this->resultsRepo->storeResults($query['user_id'], $query['id'], $results); $this->queriesRepo->markQueryImported($query['id']); + $this->scoringQueueRepo->insert($query['id']); }); $imported += count($results); } diff --git a/src/RelevanceScoring/RelevanceScoringProvider.php b/src/RelevanceScoring/RelevanceScoringProvider.php index 7aa738e..06c36a9 100644 --- a/src/RelevanceScoring/RelevanceScoringProvider.php +++ b/src/RelevanceScoring/RelevanceScoringProvider.php @@ -9,6 +9,7 @@ use WikiMedia\RelevanceScoring\Console\Import; use WikiMedia\RelevanceScoring\Console\ImportPending; use WikiMedia\RelevanceScoring\Console\PurgeQuery; +use WikiMedia\RelevanceScoring\Console\UpdateScoringQueue; use WikiMedia\RelevanceScoring\Controller\ImportController; use WikiMedia\RelevanceScoring\Controller\QueriesController; use WikiMedia\RelevanceScoring\Controller\ScoresController; @@ -40,10 +41,8 @@ $controllers->get('/instructions', 'search.controller.queries:instructions') ->bind('instructions'); - $controllers->get('/query', 'search.controller.queries:randomQuery') - ->bind('random_query'); - $controllers->get('/query/wiki/{wiki}', 'search.controller.queries:randomQuery') - ->bind('random_query_by_wiki'); + $controllers->get('/query', 'search.controller.queries:nextQuery') + ->bind('next_query'); $controllers->match('/query/id/{id}', 'search.controller.queries:queryById') ->bind('query_by_id'); $controllers->post('/query/skip/{id}', 'search.controller.queries:skipQueryById') @@ -83,6 +82,13 @@ $app['search.repository.scores'] = function () use ($app) { return new Repository\ScoresRepository($app['db']); }; + $app['search.repository.scoring_queue'] = function () use ($app) { + return new Repository\ScoringQueueRepository( + $app['db'], + new Util\Calendar(), + $app['search.scores_per_query'] + ); + }; $app['search.repository.users'] = function () use ($app) { return new Repository\UsersRepository($app['db']); }; @@ -105,7 +111,7 @@ // standard caption '.b_caption p', // tabbed article summary - '#tab_1 span' + '#tab_1 span', ], ], '<strong>', @@ -169,6 +175,7 @@ $app['db'], $app['search.repository.queries'], $app['search.repository.results'], + $app['search.repository.scoring_queue'], $app['search.wikis'], [ 'bing' => $app['search.importer.bing'], @@ -208,12 +215,20 @@ $app['search.repository.scores'] ); }; + $app['search.console.updateScoringQueue'] = function () use ($app) { + return new UpdateScoringQueue( + $app['search.repository.queries'], + $app['search.repository.scoring_queue'], + $app['search.repository.scores'] + ); + }; $app['search.console'] = [ 'search.console.cache-clear', 'search.console.import', 'search.console.importPending', 'search.console.purgeQuery', + 'search.console.updateScoringQueue', ]; } @@ -228,6 +243,7 @@ $app['search.repository.queries'], $app['search.repository.results'], $app['search.repository.scores'], + $app['search.repository.scoring_queue'], $app['search.wikis'] ); }; diff --git a/src/RelevanceScoring/Repository/QueriesRepository.php b/src/RelevanceScoring/Repository/QueriesRepository.php index bd2e88d..6cd00ba 100644 --- a/src/RelevanceScoring/Repository/QueriesRepository.php +++ b/src/RelevanceScoring/Repository/QueriesRepository.php @@ -120,6 +120,57 @@ } /** + * @param string $wiki + * + * @return int[] + */ + public function findQueryIdsForWiki($wiki) + { + return $this->db->project( + 'SELECT id FROM queries WHERE wiki = ?', + [$wiki], + function ($row) { return $row['id']; } + ); + } + + /** + * @param string $wiki + * + * @return int[] + */ + public function findImportedQueryIdsForWiki($wiki) + { + return $this->db->project( + 'SELECT id FROM queries WHERE wiki = ? AND imported = 1', + [$wiki], + function ($row) { return $row['id']; } + ); + } + /** + * @return int[] + */ + public function findAllQueryIds() + { + return $this->db->project( + 'SELECT id FROM queries', + [], + function ($row) { return $row['id']; } + ); + } + + /** + * @return int[] + */ + public function findAllImportedQueryIds() + { + return $this->db->project( + 'SELECT id FROM queries WHERE imported = 1', + [], + function ($row) { return $row['id']; } + ); + } + + /** * @param int $id * * @return Option<array> diff --git a/src/RelevanceScoring/Repository/ScoresRepository.php b/src/RelevanceScoring/Repository/ScoresRepository.php index 1a1f6bd..7440f2a 100644 --- a/src/RelevanceScoring/Repository/ScoresRepository.php +++ b/src/RelevanceScoring/Repository/ScoresRepository.php @@ -23,6 +23,8 @@ * @param int|null $score * * @return int + * + * @throws RuntimeException */ public function storeQueryScore(User $user, $queryId, $resultId, $score) { @@ -35,12 +37,45 @@ ]; $affected = $this->db->insert('scores', $row); if ($affected !== 1) { - throw new \RuntimeException('Failed inserting row'); + throw new RuntimeException('Failed inserting row'); } return $this->db->lastInsertId(); } + public function getNumberOfScores(array $queryIds) + { + $sql = <<<EOD +SELECT query_id, count(distinct user_id) as count + FROM scores s + WHERE query_id IN (:ids) + GROUP BY query_id +EOD; + + $res = $this->db->fetchAll( + $sql, + ['ids' => $queryIds], + ['ids' => Connection::PARAM_INT_ARRAY] + ); + if ($res === false) { + throw new RuntimeException('Query Failure'); + } + + $byQueryId = []; + foreach ($res as $row) { + $byQueryId[$row['query_id']] = $row['count']; + } + + return $byQueryId; + } + + /** + * @param User $user + * @param int $startingAtId + * @param int $limit + * + * @return array + */ public function getScoredQueries(User $user, $startingAtId = 0, $limit = 20) { @@ -62,6 +97,13 @@ return $qb->execute()->fetchAll() ?: []; } + /** + * @param int $queryId + * + * @return array + * + * @throws RuntimeException + */ public function getScoresForQuery($queryId) { $sql = <<<EOD @@ -85,6 +127,11 @@ return $res; } + /** + * @param User $user + * @param int $queryId + * @param array $scores + */ public function storeQueryScores(User $user, $queryId, array $scores) { $this->db->transactional(function () use ($user, $queryId, $scores) { @@ -94,6 +141,9 @@ }); } + /** + * @return array + */ public function getAll() { $sql = <<<EOD @@ -118,7 +168,7 @@ * * @return array * - * @throws \Exception + * @throws RuntimeException * * @todo make not suck */ diff --git a/src/RelevanceScoring/Repository/ScoringQueueRepository.php b/src/RelevanceScoring/Repository/ScoringQueueRepository.php new file mode 100644 index 0000000..ac54a0b --- /dev/null +++ b/src/RelevanceScoring/Repository/ScoringQueueRepository.php @@ -0,0 +1,266 @@ +<?php + +namespace WikiMedia\RelevanceScoring\Repository; + +use Doctrine\DBAL\Connection; +use PlasmaConduit\option\None; +use PlasmaConduit\option\Option; +use PlasmaConduit\option\Some; +use Psr\Log\LoggerAwareTrait; +use Psr\Log\NullLogger; +use WikiMedia\OAuth\User; +use WikiMedia\RelevanceScoring\Util\Calendar; + +/** + * Maintains a queue of queries that need to be scored. Is somewhat + * lazy and allows individual requests to be fulfilled by multiple + * users, but only if all queries are already assigned. + * + * Assigns priority to items in the queue, with 0 being the highest + * priority. + */ +class ScoringQueueRepository +{ + use LoggerAwareTrait; + + /** @var Connection */ + private $db; + /** @var Calendar */ + private $cal; + /** @var int */ + private $defaultNumSlots; + + /** + * @var Connection + * @var Calendar $cal + * @var int $numSlots The default number of scoring slots to create + * for a query + */ + public function __construct(Connection $db, Calendar $cal, $numSlots) + { + $this->db = $db; + $this->cal = $cal; + $this->defaultNumSlots = $numSlots; + $this->logger = new NullLogger(); + } + + /** + * @return int + */ + public function getDefaultNumSlots() + { + return $this->defaultNumSlots; + } + + /** + * @param int[] $queryIds + * + * @return int[] Map from query id to number of pending scores + */ + public function getNumberPending(array $queryIds) + { + $sql = <<<EOD +SELECT query_id, COUNT(1) AS count + FROM scoring_queue + WHERE query_id IN (?) + GROUP BY query_id +EOD; + $res = $this->db->fetchAll( + $sql, + [$queryIds], + [Connection::PARAM_INT_ARRAY] + ); + if (!$res) { + throw new RuntimeException('Failed querying database'); + } + $byQueryId = []; + foreach ($res as $row) { + $byQueryId[$row['query_id']] = $row['count']; + } + + return $byQueryId; + } + + /** + * Mark a queryId as needing to be scored $numSlots times. + * + * @param int $queryId + * @param int $numSlots + */ + public function insert($queryId, $numSlots = null) + { + if ($numSlots === null) { + $numSlots = $this->defaultNumSlots; + } + + $params = ['queryId' => $queryId]; + $rows = []; + // very simple priority assignment from 1 to $numSlots. Note + // that 0 is the highest priority. + for (;$numSlots > 0; --$numSlots) { + $rows[] = "(:queryId, :priority$numSlots)"; + $params["priority$numSlots"] = $numSlots; + } + + $sql = 'INSERT INTO scoring_queue (query_id, priority) VALUES '. + implode(',', $rows); + + return $this->db->executeUpdate($sql, $params); + } + + /** + * Assign a query to a particular user. + * + * @param User $user + * + * @return Option<int> query id + */ + public function pop(User $user) + { + $this->db->transactional(function () use ($user, &$res) { + // A user should only be assigned to one query at a time. + // Since we arn't particularly strict this is more a convenience + // against assigning a user to the same query twice. + $this->unassignUser($user); + + $rows = $this->popNull($user); + if (!$rows) { + $rows = $this->popLastAssigned($user); + } + if ($rows) { + $row = reset($rows); + $this->assign($row['id'], $user); + $res = new Some($row['query_id']); + } else { + $res = new None(); + } + }); + + return $res; + } + + /** + * Mark a query as scored by a specific user. + * + * @param User $user + * @param int $queryId + */ + public function markScored($user, $queryId) + { + $conditions = ['query_id' => $queryId, 'user_id' => $user->uid]; + $rowsDeleted = $this->db->delete('scoring_queue', $conditions); + + if ($rowsDeleted > 1) { + $this->logger->warn( + 'Unexpectedly deleted {numRows} for query {query_id} and user {user_id}', + $conditions + ['numRows' => $rowsDeleted] + ); + } elseif ($rowsDeleted === 0) { + // The query might have been unassigned. try deleting an + // unassigned entry for the same query. Doesn't matter if + // this succedes, extra ratings are allowed. + $sql = <<<EOD +DELETE FROM scoring_queue +WHERE user_id IS NULL and query_id = ? +LIMIT 1 +EOD; + $this->db->executeUpdate($sql, [$queryId]); + } + } + + /** + * Unassign queries that have been assigned longer + * than $ageInSeconds. This does not prevent the + * assigned user from grading the query, it only + * means it will potentially be assigned to a new user. + * + * @param int $ageInSeconds + * + * @return int Number of rows unassigned + */ + public function unassignOld($ageInSeconds) + { + $sql = <<<EOD +UPDATE scoring_queue + SET user_id = NULL, last_assigned = NULL + WHERE last_assigned < ? +EOD; + + return $this->db->executeUpdate($sql, [$this->cal->now() - $ageInSeconds]); + } + + /** + * Unassigned the user from all queries. + * + * @param User $user + */ + public function unassignUser(User $user) + { + $this->db->update( + 'scoring_queue', + ['user_id' => null, 'last_assigned' => null], + ['user_id' => $user->uid] + ); + } + + private function assign($queueId, User $user) + { + $rowsUpdated = $this->db->update( + 'scoring_queue', + [ + 'user_id' => $user->uid, + 'last_assigned' => $this->cal->now(), + ], + ['id' => $queueId] + ); + + return $rowsUpdated > 0; + } + + /** + * Fetch a row from the queue with the + * lowest priority. + * + * @param User $user + * + * @return array + */ + private function popNull(User $user) + { + $sql = <<<EOD +SELECT s_q.id, s_q.query_id + FROM scoring_queue s_q + LEFT OUTER JOIN queries_skipped q_s + ON q_s.user_id = ? AND q_s.query_id = s_q.query_id + WHERE s_q.user_id IS NULL + AND q_s.id IS NULL + ORDER BY s_q.priority ASC + LIMIT 1 +EOD; + + return $this->db->fetchAll($sql, [$user->uid]); + } + + /** + * Get an item from the queue based on oldest assignment + * date. Doesn't take queue priority into account. + * + * @param User $user + * + * @return array + */ + private function popLastAssigned(User $user) + { + $sql = <<<EOD +SELECT s_q.id, s_q.query_id + FROM scoring_queue s_q + LEFT OUTER JOIN queries_skipped q_s + ON q_s.user_id = ? AND q_s.query_id = s_q.query_id + WHERE q_s.id IS NULL + ORDER BY s_q.last_assigned ASC + LIMIT 1 +EOD; + + return $this->db->fetchAll($sql, [$user->uid]); + } +} diff --git a/src/RelevanceScoring/Util/Calendar.php b/src/RelevanceScoring/Util/Calendar.php new file mode 100644 index 0000000..7c11aa4 --- /dev/null +++ b/src/RelevanceScoring/Util/Calendar.php @@ -0,0 +1,14 @@ +<?php + +namespace WikiMedia\RelevanceScoring\Util; + +/** + * Very simple class allowing time mocking. + */ +class Calendar +{ + public function now() + { + return time(); + } +} diff --git a/tests/unit/RelevanceScoring/Import/HtmlResultGetterTest.php b/tests/unit/RelevanceScoring/Import/HtmlResultGetterTest.php index 0411ff6..1ee0b8e 100644 --- a/tests/unit/RelevanceScoring/Import/HtmlResultGetterTest.php +++ b/tests/unit/RelevanceScoring/Import/HtmlResultGetterTest.php @@ -113,22 +113,22 @@ $this->assertEquals($expected, $getter->handleResponse($response, 'testwiki', '')); } - /** - * Bing can return content a few different ways. In this test bing has - * provided a tabbed response for the first result containing different - * section from the article. - */ - public function testBing() - { - $response = new \GuzzleHttp\Psr7\Response( - 200, - ['Content-Type' => 'text/html; charset=UTF-8'], - file_get_contents( __DIR__ . '/../../../fixtures/bing_001.html' ) - ); - - $app = include __DIR__ . '/../../../../app.php'; - $bing = $app['search.importer.bing']; - - $bing->handleResponse($response, 'enwiki', ''); - } + /** + * Bing can return content a few different ways. In this test bing has + * provided a tabbed response for the first result containing different + * section from the article. + */ + public function testBing() + { + $response = new \GuzzleHttp\Psr7\Response( + 200, + ['Content-Type' => 'text/html; charset=UTF-8'], + file_get_contents(__DIR__.'/../../../fixtures/bing_001.html') + ); + + $app = include __DIR__.'/../../../../app.php'; + $bing = $app['search.importer.bing']; + + $bing->handleResponse($response, 'enwiki', ''); + } } diff --git a/tests/unit/RelevanceScoring/Repository/BaseRepositoryTest.php b/tests/unit/RelevanceScoring/Repository/BaseRepositoryTest.php index 272ca19..42145f2 100644 --- a/tests/unit/RelevanceScoring/Repository/BaseRepositoryTest.php +++ b/tests/unit/RelevanceScoring/Repository/BaseRepositoryTest.php @@ -22,7 +22,10 @@ $dbOptions['dbname'] = 'relevance_test'; $app['db.options'] = $dbOptions; - $tables = ['users', 'queries', 'results', 'results_sources', 'scores']; + $tables = [ + 'users', 'queries', 'results', 'scores', + 'results_sources', 'scoring_queue', + ]; $this->db = $app['db']; $this->db->exec('SET FOREIGN_KEY_CHECKS = 0'); foreach ($tables as $table) { @@ -39,7 +42,7 @@ protected function genTestUser($id = null) { $user = new User(); - $user->uid = $id ?: 1234; + $user->uid = $id ?: mt_rand(); $user->name = 'testUser'.$user->uid; $user->extra['editCount'] = 0; diff --git a/tests/unit/RelevanceScoring/Repository/ResultsRepositoryTest.php b/tests/unit/RelevanceScoring/Repository/ResultsRepositoryTest.php index 447ecd3..f23fe61 100644 --- a/tests/unit/RelevanceScoring/Repository/ResultsRepositoryTest.php +++ b/tests/unit/RelevanceScoring/Repository/ResultsRepositoryTest.php @@ -11,6 +11,7 @@ { $user = $this->genTestUser(); $repo = new ResultsRepository($this->db); + $queryId = null; $this->db->transactional(function () use ($repo, $user, &$queryId) { $queryId = $this->genQuery($user, 'JFK'); $repo->storeResults($user, $queryId, [ @@ -36,6 +37,7 @@ $user = $this->genTestUser(); $repo = new ResultsRepository($this->db); + $queryId = null; $this->db->transactional(function () use ($repo, $user, $title, $snippet, &$queryId) { $queryId = $this->genQuery($user, 'starksy and hutch devil'); $repo->storeResults($user, $queryId, [ diff --git a/tests/unit/RelevanceScoring/Repository/ScoresRepositoryTest.php b/tests/unit/RelevanceScoring/Repository/ScoresRepositoryTest.php index 74da42d..9972749 100644 --- a/tests/unit/RelevanceScoring/Repository/ScoresRepositoryTest.php +++ b/tests/unit/RelevanceScoring/Repository/ScoresRepositoryTest.php @@ -36,4 +36,26 @@ $this->assertArrayHasKey('score', $score); $this->assertEquals(null, $score['score']); } + + public function testGetNumberOfScores() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'something'); + $resultIds = $this->genResult($user, $queryId, [ + new ImportedResult('unittest', 'Some Title', '...', 0), + ]); + $resultId = reset($resultIds); + + $repo = new ScoresRepository($this->db); + $repo->storeQueryScore($user, $queryId, $resultId, null); + + $scoreCount = $repo->getNumberOfScores([$queryId]); + $this->assertArrayHasKey($queryId, $scoreCount); + $this->assertEquals(1, $scoreCount[$queryId]); + + $user2 = $this->genTestUser(); + $repo->storeQueryScore($user2, $queryId, $resultId, null); + $scoreCount = $repo->getNumberOfScores([$queryId]); + $this->assertEquals(2, $scoreCount[$queryId]); + } } diff --git a/tests/unit/RelevanceScoring/Repository/ScoringQueueRepositoryTest.php b/tests/unit/RelevanceScoring/Repository/ScoringQueueRepositoryTest.php new file mode 100644 index 0000000..9a57766 --- /dev/null +++ b/tests/unit/RelevanceScoring/Repository/ScoringQueueRepositoryTest.php @@ -0,0 +1,264 @@ +<?php + +namespace WikiMedia\Test\RelevanceScoring\Repository; + +use WikiMedia\RelevanceScoring\Repository\ScoringQueueRepository; + +class ScoringQueueRepositoryTest extends BaseRepositoryTest +{ + private $repo; + private $cal; + + public function setUp() + { + parent::setUp(); + // clock always returns a new higher value on each call + $this->cal = $this->getMock('WikiMedia\RelevanceScoring\Util\Calendar'); + $current = 1; + $this->cal->expects($this->any()) + ->method('now') + ->will($this->returnCallback(function () use (&$current) { + return $current++; + })); + $this->repo = new ScoringQueueRepository($this->db, $this->cal, 1); + } + + public static function invalidQueryIdProvider() + { + return [ + [null], + ['abc'], + [42], + ]; + } + /** + * @dataProvider invalidQueryIdProvider + * @expectedException Doctrine\DBAL\Exception\ForeignKeyConstraintViolationException + */ + public function testInsertOnlyAllowsValidQueryIds($id) + { + $this->repo->insert($id, 2); + } + + public function testInsertsDefaultNumberOfQueryIds() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'foo'); + + $repo = new ScoringQueueRepository($this->db, $this->cal, 4); + $this->assertEquals(4, $repo->insert($queryId)); + $this->assertEquals(4, $this->db->fetchColumn( + 'select count(1) from scoring_queue' + )); + + $repo = new ScoringQueueRepository($this->db, $this->cal, 6); + $this->assertEquals(6, $repo->insert($queryId)); + $this->assertEquals(10, $this->db->fetchColumn( + 'select count(1) from scoring_queue' + )); + } + + public function testInsertsProvidedNumberOfQueryIds() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'foo'); + + $this->assertEquals(5, $this->repo->insert($queryId, 5)); + $this->assertEquals(5, $this->db->fetchColumn( + 'select count(1) from scoring_queue' + )); + + $this->assertEquals(2, $this->repo->insert($queryId, 2)); + $this->assertEquals(7, $this->db->fetchColumn( + 'select count(1) from scoring_queue' + )); + } + + public function testBasicQueryPop() + { + $user = $this->genTestUser(); + foreach (['a', 'b', 'c', 'd', 'e', 'f', 'g'] as $query) { + $queryId = $this->genQuery($user, $query); + $this->repo->insert($queryId, 1); + $expected[] = $queryId; + } + foreach ($expected as $id) { + $assigned[] = $this->repo->pop($this->genTestUser())->getOrElse(null); + } + + sort($expected); + sort($assigned); + $this->assertEquals($expected, $assigned); + } + + public function testPopStillDistributesWhenAllAssigned() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 1); + + $this->assertEquals($queryId, $this->repo->pop($this->genTestUser())->getOrElse(null)); + $this->assertEquals($queryId, $this->repo->pop($this->genTestUser())->getOrElse(null)); + $this->assertEquals($queryId, $this->repo->pop($this->genTestUser())->getOrElse(null)); + } + + public function testUserOnlyAssignedToOneSlotAtATime() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 2); + + $this->repo->pop($user); + $this->repo->pop($user); + + $this->assertEquals(1, $this->db->fetchColumn( + 'SELECT COUNT(1) from scoring_queue WHERE user_id = ?', + [$user->uid] + )); + } + + public function testBasicPopRespectsUserSkippedQueries() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 5); + $this->db->insert('queries_skipped', [ + 'user_id' => $user->uid, + 'query_id' => $queryId, + ]); + // ensure even though there are remaining items for $queryId + // in queue, they arn't given to this user. + $this->assertTrue($this->repo->pop($user)->isEmpty()); + } + + public function testPopOnEmptyQueueRespectsUserSkippedQueries() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 1); + $this->db->insert('queries_skipped', [ + 'user_id' => $user->uid, + 'query_id' => $queryId, + ]); + // Assign only item in queue to different user + $this->assertFalse($this->repo->pop($this->genTestUser())->isEmpty()); + // ensure that isn't also assigned to user that skipped it + $this->assertTrue($this->repo->pop($user)->isEmpty()); + } + + public function testUnassignsOld() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 5); + + $this->repo->pop($this->genTestUser()); + $this->repo->pop($this->genTestUser()); + $this->repo->pop($this->genTestUser()); + + $this->assertEquals(3, $this->db->fetchColumn( + 'SELECT COUNT(1) FROM scoring_queue WHERE user_id IS NOT NULL' + )); + + // A bit of a weak test ... requires pop() to fetch calendar + // time exactly once for each pop. The three assigned queries + // should have times 1, 2, 3. The unassign call should delete + // everything older than 4 - $ageInSeconds. + $this->repo->unassignOld(2); + $this->assertEquals(2, $this->db->fetchColumn( + 'SELECT COUNT(1) FROM scoring_queue WHERE user_id IS NOT NULL' + )); + } + + public function testMarkScoredRemovesFromQueue() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 2); + + $this->repo->pop($user); + $this->repo->markScored($user, $queryId); + $this->assertEquals(1, $this->db->fetchColumn( + 'SELECT COUNT(1) FROM scoring_queue' + )); + } + + public function testMarkScoredRemovesFromQueueAfterUnassigned() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 2); + $this->repo->pop($user); + $this->repo->unassignOld(0); + $this->repo->markScored($user, $queryId); + $this->assertEquals(1, $this->db->fetchColumn( + 'SELECT COUNT(1) FROM scoring_queue' + )); + } + + public function testMarkScoredDoesNothingIfReassigned() + { + $user = $this->genTestUser(); + $queryId = $this->genQuery($user, 'zzz'); + $this->repo->insert($queryId, 1); + $this->repo->pop($this->genTestUser()); + $this->repo->markScored($user, $queryId); + $this->assertEquals(1, $this->db->fetchColumn( + 'SELECT COUNT(1) FROM scoring_queue' + )); + } + + public function testGetNumberPending() + { + $user = $this->genTestUser(); + $queryIds = [ + $this->genQuery($user, 'zzz'), + $this->genQuery($user, 'yyy'), + $this->genQuery($user, 'xxx'), + ]; + + foreach ($queryIds as $queryId) { + $this->repo->insert($queryId, 2); + } + + foreach ($queryIds as $queryId) { + $expected[$queryId] = 2; + } + $this->assertEquals($expected, $this->repo->getNumberPending($queryIds)); + } + + public function testPopsItemsInPriorityOrder() + { + $user = $this->genTestUser(); + $queryIds = [ + $this->genQuery($user, 'zzz'), + $this->genQuery($user, 'yyy'), + $this->genQuery($user, 'xxx'), + ]; + + foreach ($queryIds as $queryId) { + $this->repo->insert($queryId, 2); + } + + $order = []; + for ($i = 0; $i < 6; ++$i) { + $maybeId = $this->repo->pop($user); + if ($maybeId->isEmpty()) { + throw new \RuntimeException('No item retrieved'); + } + $order[] = $maybeId->get(); + $this->repo->markScored($user, $maybeId->get()); + } + + // each item should be represented in the first 3 and the last 3 + $first = array_slice($order, 0, 3); + $second = array_slice($order, 3); + sort($first); + sort($second); + sort($queryIds); + + $debug = json_encode($order); + $this->assertEquals($queryIds, $first, $debug); + $this->assertEquals($queryIds, $second, $debug); + } +} diff --git a/views/instructions.twig b/views/instructions.twig index 932b2e3..e7bf226 100644 --- a/views/instructions.twig +++ b/views/instructions.twig @@ -7,7 +7,7 @@ {% block content %} <div class="row"> - <a href="{{ path('random_query') }}" class="btn btn-default">Continue to scoring</a> + <a href="{{ path('next_query') }}" class="btn btn-default">Continue to scoring</a> </div> <div class="row"> @@ -64,7 +64,7 @@ </div> <div class="row"> - <a href="{{ path('random_query') }}" class="btn btn-default">Continue to scoring</a> + <a href="{{ path('next_query') }}" class="btn btn-default">Continue to scoring</a> </div> {% endblock %} diff --git a/views/layout.twig b/views/layout.twig index c2333a1..191b6fa 100644 --- a/views/layout.twig +++ b/views/layout.twig @@ -33,7 +33,7 @@ <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Signed in as {{ user.nickname }}<span class="caret"></span></a> <ul class="dropdown-menu"> - <li><a href="{{ path('random_query') }}">Score Results</a></li> + <li><a href="{{ path('next_query') }}">Score Results</a></li> <li><a href="{{ path('instructions') }}">Instructions</a></li> <li><a href="{{ path('import_query') }}">Import Query</a></li> <li><a href="{{ path('scores') }}">Scores</a></li> -- To view, visit https://gerrit.wikimedia.org/r/290378 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I84f70674100b7a1cf37c8db7051298c523241a52 Gerrit-PatchSet: 4 Gerrit-Project: wikimedia/discovery/discernatron Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Springle <sprin...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits