http://www.mediawiki.org/wiki/Special:Code/MediaWiki/91252
Revision: 91252 Author: yuvipanda Date: 2011-07-01 07:09:42 +0000 (Fri, 01 Jul 2011) Log Message: ----------- Initial import of YuviPanda's GSoC work. Extension to ease assessment parsing/building collections for offline use. Added Paths: ----------- trunk/extensions/GPoC/ trunk/extensions/GPoC/.vimrc trunk/extensions/GPoC/AssessmentsExtractor.php trunk/extensions/GPoC/GPoC.hooks.php trunk/extensions/GPoC/GPoC.php trunk/extensions/GPoC/README trunk/extensions/GPoC/models/ trunk/extensions/GPoC/models/Rating.php trunk/extensions/GPoC/schema/ trunk/extensions/GPoC/schema/log.sql trunk/extensions/GPoC/schema/project_stats.sql trunk/extensions/GPoC/schema/projects.sql trunk/extensions/GPoC/schema/ratings.sql Added: trunk/extensions/GPoC/.vimrc =================================================================== --- trunk/extensions/GPoC/.vimrc (rev 0) +++ trunk/extensions/GPoC/.vimrc 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,2 @@ +set noexpandtab +set tabstop=4 Added: trunk/extensions/GPoC/AssessmentsExtractor.php =================================================================== --- trunk/extensions/GPoC/AssessmentsExtractor.php (rev 0) +++ trunk/extensions/GPoC/AssessmentsExtractor.php 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,30 @@ +<?php + +/** + * Helps extract assessments from a parsed $DOM file + **/ +class AssessmentsExtractor +{ + private $mArticle; + private $mText; + + function __construct( $article, $preparedText ) { + $this->mText = $preparedText; + $this->mArticle = $article; + } + + public function extractAssessments() { + $regex = '/<span data-project-name="(?P<project>.*)" data-importance="(?P<importance>.*)" data-quality="(?P<quality>.*)"\s*>/'; + $matches = array(); + preg_match_all($regex, $this->mText, $matches, PREG_SET_ORDER); + + $assessments = array(); + foreach($matches as $match) { + $assessments[$match['project']] = array( + 'importance' => $match['importance'], + 'quality' => $match['quality'] + ); + } + return $assessments; + } +} Added: trunk/extensions/GPoC/GPoC.hooks.php =================================================================== --- trunk/extensions/GPoC/GPoC.hooks.php (rev 0) +++ trunk/extensions/GPoC/GPoC.hooks.php 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,61 @@ +<?php +/** + * + * @file + * @ingroup Extensions + * @author Yuvi Panda, http://yuvi.in + * @copyright © 2011 Yuvaraj Pandian (yuvipa...@yuvi.in) + * @licence Modified BSD License + */ + +if ( !defined( 'MEDIAWIKI' ) ) { + exit( 1 ); +} + +require_once "AssessmentsExtractor.php"; +require_once "models/Rating.php"; + +class GPoCHooks { + + private static function updateDatabase( $title, $assessments, $timestamp ) { + $main_title = Title::makeTitle( NS_MAIN, $title->getText() ); + $ratings = Rating::forTitle( $main_title ); + foreach ( $assessments as $project => $assessment ) { + $curRating = $ratings[$project]; + if( $curRating ) { + $curRating->update( $assessment['importance'], $assessment['quality'], 0 ); + } else { + $rating = new Rating( + $project, + $main_title->getNamespace(), + $main_title->getText(), + $assessment['quality'], + 0, + $assessment['importance'], + 0 + ); + $rating->saveAll(); + } + } + } + + public static function ArticleSaveComplete(&$article, &$user, $text, $summary, $minoredit, $watchthis, $sectionanchor, &$flags, $revision, &$status, $baseRevId) { + global $wgParser; + $title = $article->getTitle(); + if( $title->getNamespace() == NS_TALK && $revision ) { + // All conditions to minimize the situations we've to run the job to update the data + $preparedText = $article->prepareTextForEdit( $text )->output->getText(); + $extractor = new AssessmentsExtractor( $article, $preparedText ); + $assessments = $extractor->extractAssessments(); + GPoCHooks::updateDatabase( $title, $assessments, $revision ); + } + return true; + } + + public static function SetupSchema( DatabaseUpdater $du ) { + $base = dirname( __FILE__ ) . '/schema'; + $du->addExtensionTable( "ratings", "$base/ratings.sql"); + $du->addExtensionTable( "project_stats", "$base/project_stats.sql" ); + return true; + } +} Added: trunk/extensions/GPoC/GPoC.php =================================================================== --- trunk/extensions/GPoC/GPoC.php (rev 0) +++ trunk/extensions/GPoC/GPoC.php 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,27 @@ +<?php +/** + * Proof of Concept for Yuvi Panda's 2011 GSoC + * + * @file + * @ingroup Extensions + * @author Yuvi Panda, http://yuvi.in + * @copyright © 2011 Yuvaraj Pandian (yuvipa...@yuvi.in) + * @licence Modified BSD License + */ + +if( !defined( 'MEDIAWIKI' ) ) { + echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" ); + die( 1 ); +} + +// Extension credits that will show up on Special:Version + +// Set up the new special page +$dir = dirname( __FILE__ ) . '/'; + +$wgAutoloadClasses['GPoCHooks'] = $dir . 'GPoC.hooks.php'; + +$wgHooks['ArticleSaveComplete'][] = 'GPoCHooks::ArticleSaveComplete'; +$wgHooks['LoadExtensionSchemaUpdates'][] = 'GPoCHooks::SetupSchema'; + +// Configuration Added: trunk/extensions/GPoC/README =================================================================== --- trunk/extensions/GPoC/README (rev 0) +++ trunk/extensions/GPoC/README 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,3 @@ +This is the Proof of Concept for YuviPanda's GSoC 2011 Project. + +Throwaway code. Don't blame me if it cuts off your left foot. Added: trunk/extensions/GPoC/models/Rating.php =================================================================== --- trunk/extensions/GPoC/models/Rating.php (rev 0) +++ trunk/extensions/GPoC/models/Rating.php 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,148 @@ +<?php + +/** + * Represents an article and associated rating + **/ +class Rating { + public $project; + public $namespace; + public $title; + public $quality; + public $quality_timestamp; + public $importance; + public $importance_timestamp; + + private $old_importance; + private $old_quality; + private $inDB = false; + + private static function getImportanceColumn( $importance ) { + $importanceColumnMapping = array( + 'top' => 'ps_top_icount', + 'high' => 'ps_high_icount', + 'mid' => 'ps_mid_icount', + 'low' => 'ps_mid_icount', + 'no' => 'ps_no_icount', + '' => 'ps_unclassified_icount' + ); + return $importanceColumnMapping[ strtolower( $importance ) ]; + } + + public function __construct( $project, $namespace, $title, $quality, $quality_timestamp, $importance, $importance_timestamp ) { + $this->project = $project; + $this->namespace = $namespace; + $this->title = $title; + $this->quality = $quality; + $this->quality_timestamp = $quality_timestamp; + $this->importance = $importance; + $this->importance_timestamp = $importance_timestamp; + } + + public function update( $importance, $quality, $timestamp ) { + if( $quality != $this->quality ) { + $this->old_quality = $this->quality; + $this->quality = $quality; + $this->quality_timestamp = $timestamp; + } + if( $importance != $this->importance ) { + $this->old_importance = $this->importance; + $this->importance = $importance; + $this->importance_timestamp = $timestamp; + } + $this->saveAll(); + } + + // Note: Huge sql injection vector ahead. FIXME + private function updateAggregateStats( $is_new_rating ) { + if(! $is_new_rating && empty($this->old_importance) && empty($this->old_quality) ) { + return; + } + $dbw = wfGetDB( DB_MASTER ); + // Rating has just been detected. + // So we can ignore $old_importance and $old_quality + $importance_column = Rating::getImportanceColumn( $this->importance ); + $query = "INSERT INTO project_stats (ps_project, ps_quality, $importance_column) "; + $query .= "VALUES ('$this->project', '$this->quality', 1) "; + $query .= "ON DUPLICATE KEY "; + $query .= "UPDATE $importance_column = $importance_column + 1 "; + if(! $is_new_rating && ! empty( $this->old_importance ) ) { + $old_importance_column = Rating::getImportanceColumn( $this->old_importance ); + $query .= ", $old_importance_column = $old_importance_column - 1"; + } + $query .= ";"; + $dbw->query($query); + if(! $is_new_rating && ! empty( $this->old_quality ) ) { + if(! isset($old_importance_column) ) { + $old_importance_column = $importance_column; + } + $query = "UPDATE project_stats SET $old_importance_column = $old_importance_column - 1 "; + $query .= "WHERE ps_project = '$this->project' and ps_quality = '$this->old_quality';"; + $dbw->query($query); + } + } + public function saveAll() { + $data_array = array( + 'r_project' => $this->project, + 'r_namespace' => $this->namespace, + 'r_article' => $this->title, + 'r_quality' => $this->quality, + 'r_quality_timestamp' => $this->quality_timestamp, + 'r_importance' => $this->importance, + 'r_importance_timestamp' => $this->importance_timestamp + ); + $dbw = wfGetDB( DB_MASTER ); + if( $this->inDB ) { + $dbw->update( + "ratings", + $data_array, + array( + 'r_namespace' => $this->namespace, + 'r_article' => $this->title, + 'r_project' => $this->project + ), + __METHOD__ + ); + + $this->updateAggregateStats( false ); + } else { + $dbw->insert( + "ratings", + $data_array, + __METHOD__ + ); + + $this->updateAggregateStats( true ); + $this->inDB = true; + } + + } + + public static function forTitle( $title ) { + $dbr = wfGetDB( DB_SLAVE ); + $query = $dbr->select( + "ratings", + array( + "r_project", "r_namespace", "r_article", "r_quality", + "r_quality_timestamp", "r_importance", "r_importance_timestamp" + ), + array( + "r_namespace" => $title->getNamespace(), + "r_article" => $title->getText(), + ), + __METHOD__ + ); + + $ratings = array(); + + foreach( $query as $row ) { + $rating = new Rating( + $row->r_project, $row->r_namespace, + $row->r_article, $row->r_quality, + $row->r_quality_timestamp, $row->r_importance, + $row->r_importance_timestamp); + $rating->inDB = true; + $ratings[$rating->project] = $rating; + } + return $ratings; + } +} Added: trunk/extensions/GPoC/schema/log.sql =================================================================== --- trunk/extensions/GPoC/schema/log.sql (rev 0) +++ trunk/extensions/GPoC/schema/log.sql 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,38 @@ +-- Replace /*_*/ with the proper prefix +-- Replace /*$wgDBTableOptions*/ with the correct options + +CREATE TABLE IF NOT EXISTS /*_*/log ( + l_project varchar(63) not null, + -- project name + + l_namespace int unsigned not null, + -- article namespace + + l_article varchar(255) not null, + -- article name + + l_action varchar(20) character set ascii not null, + -- type of log entry (e.g. 'quality') + + -- NOTE: this is ASCII because of maximum index key + -- length constraints interacting with utf-8 fields in + -- mysql. The primary key for this table is just under the limit. + + l_timestamp binary(14) not null, + -- timestamp when log entry was added + + l_old varchar(63), + -- old value (e.g. B-Class) + + l_new varchar(63), + -- new value (e.g. GA-Class) + + l_revision_timestamp binary(20) not null, + -- timestamp when page was edited + -- a wiki-format timestamp + + primary key (l_project, l_namespace, l_article, l_action, l_timestamp), + key (l_article, l_namespace) +) /*$wgDBTableOptions*/; + +CREATE INDEX /*i*/l_project ON /*_*/log (l_project); Added: trunk/extensions/GPoC/schema/project_stats.sql =================================================================== --- trunk/extensions/GPoC/schema/project_stats.sql (rev 0) +++ trunk/extensions/GPoC/schema/project_stats.sql 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,49 @@ +-- Replace /*_*/ with the proper prefix +-- Replace /*$wgDBTableOptions*/ with the correct options + +CREATE TABLE IF NOT EXISTS /*_*/project_stats ( + + ps_project varchar(63) not null, + -- project name + + ps_timestamp binary(14) not null, + -- last time project data was updated + + ps_quality varchar(63) not null, + -- quality assessment. lowercase. + -- possible values: fa, a, ga, b, b1, b2, b3, b4, b5, b6, c, start, stub, fl, l, unclassified + + ps_count int unsigned default 0, + -- how many pages are assessed in project + + ps_top_icount int unsigned default 0, + -- how many pages are assessed in project to be top importance + + ps_high_icount int unsigned default 0, + -- how many pages are assessed in project to be high importance + + ps_mid_icount int unsigned default 0, + -- how many pages are assessed in project to be mid importance + + ps_low_icount int unsigned default 0, + -- how many pages are assessed in project to be low importance + + ps_bottom_icount int unsigned default 0, + -- how many pages are assessed in project to be bottom importance + + ps_no_icount int unsigned default 0, + -- how many pages are assessed in project to be of no importance + + ps_unclassified_icount int unsigned default 0, + -- how many pages are assessed in project without a classified importance + + ps_qcount int unsigned default 0, + -- how many pages have quality assessments in the project + + ps_icount int unsigned default 0, + -- how many pages have importance assessments in the project + + primary key (ps_project, ps_quality) +) /*$wgDBTableOptions*/; + +CREATE INDEX /*i*/ps_project ON /*_*/project_stats (ps_project); Added: trunk/extensions/GPoC/schema/projects.sql =================================================================== --- trunk/extensions/GPoC/schema/projects.sql (rev 0) +++ trunk/extensions/GPoC/schema/projects.sql 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,36 @@ +-- Replace /*_*/ with the proper prefix +-- Replace /*$wgDBTableOptions*/ with the correct options + +CREATE TABLE IF NOT EXISTS /*_*/projects ( + + p_project varchar(63) not null, + -- project name + + p_timestamp binary(14) not null, + -- last time project data was updated + + p_wikipage varchar(255), + -- homepage on the wiki for this project + + p_parent varchar(63), + -- parent project (for task forces) + + p_shortname varchar(255), + -- display name in headers + + p_count int unsigned default 0, + -- how many pages are assessed in project + + p_qcount int unsigned default 0, + -- how many pages have quality assessments in the project + + p_icount int unsigned default 0, + -- how many pages have importance assessments in the project + + p_scope int unsigned not null default 0, + -- the project's "scope points", used to compute selection scores + + primary key (p_project) +) /*$wgDBTableOptions*/; + +CREATE INDEX /*i*/p_project ON /*_*/projects (p_project); Added: trunk/extensions/GPoC/schema/ratings.sql =================================================================== --- trunk/extensions/GPoC/schema/ratings.sql (rev 0) +++ trunk/extensions/GPoC/schema/ratings.sql 2011-07-01 07:09:42 UTC (rev 91252) @@ -0,0 +1,33 @@ +-- Replace /*_*/ with the proper prefix +-- Replace /*$wgDBTableOptions*/ with the correct options + +CREATE TABLE IF NOT EXISTS /*_*/ratings ( + r_project varchar(63) not null, + -- project name + + r_namespace int unsigned not null, + -- article namespace + + r_article varchar(255) not null, + -- article title + + r_quality varchar(63), + -- quality rating + + r_quality_timestamp binary(20), + -- time when quality rating was assigned + -- NOTE: a revid can be obtained from timestamp via API + -- a wiki-format timestamp + + r_importance varchar(63), + -- importance rating + + r_importance_timestamp binary(20), + -- time when importance rating was assigned + -- a wiki-style timestamp + + primary key (r_project, r_namespace, r_article) +) /*$wgDBTableOptions*/; + +CREATE INDEX /*i*/r_article ON /*_*/ratings (r_namespace, r_article); +CREATE INDEX /*i*/r_project ON /*_*/ratings (r_project); _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs