http://www.mediawiki.org/wiki/Special:Code/MediaWiki/91252

Revision: 91252
Author:   yuvipanda
Date:     2011-07-01 07:09:42 +0000 (Fri, 01 Jul 2011)
Log Message:
-----------
Initial import of YuviPanda's GSoC work.

Extension to ease assessment parsing/building
collections for offline use.

Added Paths:
-----------
    trunk/extensions/GPoC/
    trunk/extensions/GPoC/.vimrc
    trunk/extensions/GPoC/AssessmentsExtractor.php
    trunk/extensions/GPoC/GPoC.hooks.php
    trunk/extensions/GPoC/GPoC.php
    trunk/extensions/GPoC/README
    trunk/extensions/GPoC/models/
    trunk/extensions/GPoC/models/Rating.php
    trunk/extensions/GPoC/schema/
    trunk/extensions/GPoC/schema/log.sql
    trunk/extensions/GPoC/schema/project_stats.sql
    trunk/extensions/GPoC/schema/projects.sql
    trunk/extensions/GPoC/schema/ratings.sql

Added: trunk/extensions/GPoC/.vimrc
===================================================================
--- trunk/extensions/GPoC/.vimrc                                (rev 0)
+++ trunk/extensions/GPoC/.vimrc        2011-07-01 07:09:42 UTC (rev 91252)
@@ -0,0 +1,2 @@
+set noexpandtab
+set tabstop=4

Added: trunk/extensions/GPoC/AssessmentsExtractor.php
===================================================================
--- trunk/extensions/GPoC/AssessmentsExtractor.php                              
(rev 0)
+++ trunk/extensions/GPoC/AssessmentsExtractor.php      2011-07-01 07:09:42 UTC 
(rev 91252)
@@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Helps extract assessments from a parsed $DOM file
+ **/
+class AssessmentsExtractor
+{
+       private $mArticle;
+       private $mText;
+
+       function __construct( $article, $preparedText ) {
+               $this->mText = $preparedText;
+               $this->mArticle = $article;
+       }       
+
+       public function extractAssessments() {
+               $regex = '/<span data-project-name="(?P<project>.*)" 
data-importance="(?P<importance>.*)" data-quality="(?P<quality>.*)"\s*>/';
+               $matches = array();
+               preg_match_all($regex, $this->mText, $matches, PREG_SET_ORDER);
+
+               $assessments = array();
+               foreach($matches as $match) {
+                       $assessments[$match['project']] = array(
+                               'importance' => $match['importance'],
+                               'quality' => $match['quality']
+                       );
+               }
+               return $assessments;
+       }
+}

Added: trunk/extensions/GPoC/GPoC.hooks.php
===================================================================
--- trunk/extensions/GPoC/GPoC.hooks.php                                (rev 0)
+++ trunk/extensions/GPoC/GPoC.hooks.php        2011-07-01 07:09:42 UTC (rev 
91252)
@@ -0,0 +1,61 @@
+<?php
+/**
+ *
+ * @file
+ * @ingroup Extensions
+ * @author Yuvi Panda, http://yuvi.in
+ * @copyright © 2011 Yuvaraj Pandian (yuvipa...@yuvi.in)
+ * @licence Modified BSD License
+ */
+
+if ( !defined( 'MEDIAWIKI' ) ) {
+       exit( 1 );
+}
+
+require_once "AssessmentsExtractor.php";
+require_once "models/Rating.php";
+
+class GPoCHooks {
+
+       private static function updateDatabase( $title, $assessments, 
$timestamp ) {
+               $main_title = Title::makeTitle( NS_MAIN, $title->getText() );
+               $ratings = Rating::forTitle( $main_title );
+               foreach ( $assessments as $project => $assessment ) {
+                       $curRating = $ratings[$project];
+                       if( $curRating ) {
+                               $curRating->update( $assessment['importance'], 
$assessment['quality'], 0 );
+                       } else {
+                               $rating = new Rating(
+                                       $project, 
+                                       $main_title->getNamespace(),
+                                       $main_title->getText(),
+                                       $assessment['quality'],
+                                       0,
+                                       $assessment['importance'],
+                                       0
+                               );
+                               $rating->saveAll();
+                       }
+               }
+       }
+
+       public static function ArticleSaveComplete(&$article, &$user, $text, 
$summary, $minoredit, $watchthis, $sectionanchor, &$flags, $revision, &$status, 
$baseRevId) {
+               global $wgParser;
+               $title = $article->getTitle();
+               if( $title->getNamespace() == NS_TALK && $revision ) {
+                       // All conditions to minimize the situations we've to 
run the job to update the data
+                       $preparedText = $article->prepareTextForEdit( $text 
)->output->getText();
+                       $extractor = new AssessmentsExtractor( $article, 
$preparedText );
+                       $assessments = $extractor->extractAssessments();
+                       GPoCHooks::updateDatabase( $title, $assessments, 
$revision );
+               }
+               return true;
+       }
+
+       public static function SetupSchema( DatabaseUpdater $du ) {
+               $base = dirname( __FILE__ ) . '/schema';
+               $du->addExtensionTable( "ratings", "$base/ratings.sql");
+               $du->addExtensionTable( "project_stats", 
"$base/project_stats.sql" );
+               return true;
+       }
+}

Added: trunk/extensions/GPoC/GPoC.php
===================================================================
--- trunk/extensions/GPoC/GPoC.php                              (rev 0)
+++ trunk/extensions/GPoC/GPoC.php      2011-07-01 07:09:42 UTC (rev 91252)
@@ -0,0 +1,27 @@
+<?php
+/**
+ * Proof of Concept for Yuvi Panda's 2011 GSoC
+ *
+ * @file
+ * @ingroup Extensions
+ * @author Yuvi Panda, http://yuvi.in
+ * @copyright © 2011 Yuvaraj Pandian (yuvipa...@yuvi.in)
+ * @licence Modified BSD License
+ */
+
+if( !defined( 'MEDIAWIKI' ) ) {
+       echo( "This file is an extension to the MediaWiki software and cannot 
be used standalone.\n" );
+       die( 1 );
+}
+
+// Extension credits that will show up on Special:Version
+
+// Set up the new special page
+$dir = dirname( __FILE__ ) . '/';
+
+$wgAutoloadClasses['GPoCHooks'] = $dir . 'GPoC.hooks.php';
+
+$wgHooks['ArticleSaveComplete'][] = 'GPoCHooks::ArticleSaveComplete';
+$wgHooks['LoadExtensionSchemaUpdates'][] = 'GPoCHooks::SetupSchema';
+
+// Configuration

Added: trunk/extensions/GPoC/README
===================================================================
--- trunk/extensions/GPoC/README                                (rev 0)
+++ trunk/extensions/GPoC/README        2011-07-01 07:09:42 UTC (rev 91252)
@@ -0,0 +1,3 @@
+This is the Proof of Concept for YuviPanda's GSoC 2011 Project.
+
+Throwaway code. Don't blame me if it cuts off your left foot.

Added: trunk/extensions/GPoC/models/Rating.php
===================================================================
--- trunk/extensions/GPoC/models/Rating.php                             (rev 0)
+++ trunk/extensions/GPoC/models/Rating.php     2011-07-01 07:09:42 UTC (rev 
91252)
@@ -0,0 +1,148 @@
+<?php
+
+/**
+ * Represents an article and associated rating 
+ **/
+class Rating {
+       public $project;
+       public $namespace;
+       public $title;
+       public $quality;
+       public $quality_timestamp;
+       public $importance;
+       public $importance_timestamp;
+
+       private $old_importance;
+       private $old_quality;
+       private $inDB = false;
+
+       private static function getImportanceColumn( $importance ) {
+               $importanceColumnMapping = array(
+                       'top' => 'ps_top_icount',
+                       'high' => 'ps_high_icount',
+                       'mid' => 'ps_mid_icount',
+                       'low' => 'ps_mid_icount',
+                       'no' => 'ps_no_icount',
+                       '' => 'ps_unclassified_icount'
+               );
+               return $importanceColumnMapping[ strtolower( $importance ) ];
+       }
+
+       public function __construct( $project, $namespace, $title, $quality, 
$quality_timestamp, $importance, $importance_timestamp ) {
+               $this->project = $project;
+               $this->namespace = $namespace;
+               $this->title = $title;
+               $this->quality = $quality;
+               $this->quality_timestamp = $quality_timestamp;
+               $this->importance = $importance;
+               $this->importance_timestamp = $importance_timestamp;
+       }
+
+       public function update( $importance, $quality, $timestamp ) {
+               if( $quality != $this->quality ) {
+                       $this->old_quality = $this->quality;
+                       $this->quality = $quality;
+                       $this->quality_timestamp = $timestamp;
+               }
+               if( $importance != $this->importance ) {
+                       $this->old_importance = $this->importance;
+                       $this->importance = $importance;
+                       $this->importance_timestamp = $timestamp;
+               }
+               $this->saveAll();
+       }
+
+       // Note: Huge sql injection vector ahead. FIXME
+       private function updateAggregateStats( $is_new_rating ) {
+               if(! $is_new_rating && empty($this->old_importance) && 
empty($this->old_quality) ) {
+                       return;
+               }
+               $dbw = wfGetDB( DB_MASTER );
+               // Rating has just been detected.
+               // So we can ignore $old_importance and $old_quality
+               $importance_column = Rating::getImportanceColumn( 
$this->importance );
+               $query = "INSERT INTO project_stats (ps_project, ps_quality, 
$importance_column) ";
+               $query .= "VALUES ('$this->project', '$this->quality', 1) ";
+               $query .= "ON DUPLICATE KEY ";
+               $query .= "UPDATE $importance_column = $importance_column + 1 ";
+               if(! $is_new_rating  && ! empty( $this->old_importance ) ) {
+                       $old_importance_column = Rating::getImportanceColumn( 
$this->old_importance );
+                       $query .= ", $old_importance_column = 
$old_importance_column - 1";
+               }
+               $query .= ";";
+               $dbw->query($query);
+               if(! $is_new_rating && ! empty( $this->old_quality ) ) {
+                       if(! isset($old_importance_column) ) {
+                               $old_importance_column = $importance_column;
+                       }
+                       $query = "UPDATE project_stats SET 
$old_importance_column = $old_importance_column - 1 ";
+                       $query .= "WHERE ps_project = '$this->project' and 
ps_quality = '$this->old_quality';";
+                       $dbw->query($query);
+               }
+       }
+       public function saveAll() {
+               $data_array = array(
+                       'r_project' => $this->project,
+                       'r_namespace' => $this->namespace,
+                       'r_article' => $this->title,
+                       'r_quality' => $this->quality,
+                       'r_quality_timestamp' => $this->quality_timestamp,
+                       'r_importance' => $this->importance,
+                       'r_importance_timestamp' => $this->importance_timestamp
+               );
+               $dbw = wfGetDB( DB_MASTER );
+               if( $this->inDB ) {
+                       $dbw->update(
+                               "ratings",
+                               $data_array,
+                               array(
+                                       'r_namespace' => $this->namespace,
+                                       'r_article' => $this->title,
+                                       'r_project' => $this->project
+                               ),
+                               __METHOD__
+                       );
+
+                       $this->updateAggregateStats( false );
+               } else {
+                       $dbw->insert(
+                               "ratings",
+                               $data_array,
+                               __METHOD__
+                       );
+
+                       $this->updateAggregateStats( true );
+                       $this->inDB = true;
+               }
+
+       }
+
+       public static function forTitle( $title ) {
+               $dbr = wfGetDB( DB_SLAVE );
+               $query = $dbr->select(
+                       "ratings",
+                       array(
+                               "r_project", "r_namespace", "r_article", 
"r_quality", 
+                               "r_quality_timestamp", "r_importance", 
"r_importance_timestamp"
+                       ),
+                       array(
+                               "r_namespace" => $title->getNamespace(),
+                               "r_article" => $title->getText(),
+                       ),
+                       __METHOD__
+               );
+
+               $ratings = array();
+
+               foreach( $query as $row ) {
+                       $rating = new Rating( 
+                               $row->r_project, $row->r_namespace,
+                               $row->r_article, $row->r_quality,
+                               $row->r_quality_timestamp, $row->r_importance,
+                               $row->r_importance_timestamp);
+                       $rating->inDB = true;
+                       $ratings[$rating->project] = $rating;
+               }
+               return $ratings;
+       }
+}       

Added: trunk/extensions/GPoC/schema/log.sql
===================================================================
--- trunk/extensions/GPoC/schema/log.sql                                (rev 0)
+++ trunk/extensions/GPoC/schema/log.sql        2011-07-01 07:09:42 UTC (rev 
91252)
@@ -0,0 +1,38 @@
+-- Replace /*_*/ with the proper prefix
+-- Replace /*$wgDBTableOptions*/ with the correct options
+
+CREATE TABLE IF NOT EXISTS /*_*/log (    
+    l_project        varchar(63)  not null,   
+    -- project name
+
+    l_namespace      int unsigned not null,
+    -- article namespace
+
+    l_article        varchar(255) not null,
+    -- article name
+
+    l_action         varchar(20) character set ascii not null,
+    -- type of log entry (e.g. 'quality')
+
+    -- NOTE: this is ASCII because of maximum index key
+    -- length constraints interacting with utf-8 fields in  
+    -- mysql. The primary key for this table is just under the limit. 
+
+    l_timestamp      binary(14)  not null,
+    -- timestamp when log entry was added
+
+    l_old            varchar(63),
+    -- old value (e.g. B-Class)
+
+    l_new            varchar(63),
+    -- new value (e.g. GA-Class)
+
+    l_revision_timestamp  binary(20)  not null,
+    -- timestamp when page was edited
+    -- a wiki-format timestamp
+
+    primary key (l_project, l_namespace, l_article, l_action, l_timestamp),
+    key (l_article, l_namespace)
+) /*$wgDBTableOptions*/;
+
+CREATE INDEX /*i*/l_project ON /*_*/log (l_project);

Added: trunk/extensions/GPoC/schema/project_stats.sql
===================================================================
--- trunk/extensions/GPoC/schema/project_stats.sql                              
(rev 0)
+++ trunk/extensions/GPoC/schema/project_stats.sql      2011-07-01 07:09:42 UTC 
(rev 91252)
@@ -0,0 +1,49 @@
+-- Replace /*_*/ with the proper prefix
+-- Replace /*$wgDBTableOptions*/ with the correct options
+
+CREATE TABLE IF NOT EXISTS /*_*/project_stats (    
+
+       ps_project         varchar(63) not null,
+       -- project name
+
+       ps_timestamp       binary(14) not null,
+       -- last time project data was updated
+
+       ps_quality           varchar(63) not null,
+       -- quality assessment. lowercase. 
+       -- possible values: fa, a, ga, b, b1, b2, b3, b4, b5, b6, c, start, 
stub, fl, l, unclassified
+
+       ps_count           int unsigned default 0,
+       -- how many pages are assessed in project 
+
+       ps_top_icount           int unsigned default 0,
+       -- how many pages are assessed in project to be top importance 
+
+       ps_high_icount           int unsigned default 0,
+       -- how many pages are assessed in project to be high importance
+
+       ps_mid_icount           int unsigned default 0,
+       -- how many pages are assessed in project to be mid importance
+
+       ps_low_icount           int unsigned default 0,
+       -- how many pages are assessed in project to be low importance
+
+       ps_bottom_icount           int unsigned default 0,
+       -- how many pages are assessed in project to be bottom importance
+
+       ps_no_icount           int unsigned default 0,
+       -- how many pages are assessed in project to be of no importance
+
+       ps_unclassified_icount           int unsigned default 0,
+       -- how many pages are assessed in project without a classified 
importance
+
+       ps_qcount          int unsigned default 0,
+       -- how many pages have quality assessments in the project
+
+       ps_icount          int unsigned default 0,
+       -- how many pages have importance assessments in the project 
+
+       primary key (ps_project, ps_quality)
+) /*$wgDBTableOptions*/;
+
+CREATE INDEX /*i*/ps_project ON /*_*/project_stats (ps_project);

Added: trunk/extensions/GPoC/schema/projects.sql
===================================================================
--- trunk/extensions/GPoC/schema/projects.sql                           (rev 0)
+++ trunk/extensions/GPoC/schema/projects.sql   2011-07-01 07:09:42 UTC (rev 
91252)
@@ -0,0 +1,36 @@
+-- Replace /*_*/ with the proper prefix
+-- Replace /*$wgDBTableOptions*/ with the correct options
+
+CREATE TABLE IF NOT EXISTS /*_*/projects (    
+
+    p_project         varchar(63) not null,
+    -- project name
+
+    p_timestamp       binary(14) not null,
+    -- last time project data was updated
+
+    p_wikipage        varchar(255),
+    -- homepage on the wiki for this project
+
+    p_parent          varchar(63),
+    -- parent project (for task forces)
+
+    p_shortname       varchar(255),
+    -- display name in headers 
+
+    p_count           int unsigned default 0,
+    -- how many pages are assessed in project 
+
+    p_qcount          int unsigned default 0,
+    -- how many pages have quality assessments in the project
+
+    p_icount          int unsigned default 0,
+    -- how many pages have importance assessments in the project 
+
+    p_scope    int unsigned not null default 0,
+    -- the project's "scope points", used to compute selection scores
+
+    primary key (p_project)
+) /*$wgDBTableOptions*/;
+
+CREATE INDEX /*i*/p_project ON /*_*/projects (p_project);

Added: trunk/extensions/GPoC/schema/ratings.sql
===================================================================
--- trunk/extensions/GPoC/schema/ratings.sql                            (rev 0)
+++ trunk/extensions/GPoC/schema/ratings.sql    2011-07-01 07:09:42 UTC (rev 
91252)
@@ -0,0 +1,33 @@
+-- Replace /*_*/ with the proper prefix
+-- Replace /*$wgDBTableOptions*/ with the correct options
+
+CREATE TABLE IF NOT EXISTS /*_*/ratings (    
+       r_project               varchar(63)  not null,
+       -- project name
+
+       r_namespace             int unsigned not null,
+       -- article namespace
+
+       r_article               varchar(255) not null,
+       -- article title
+
+       r_quality               varchar(63),
+       -- quality rating
+
+       r_quality_timestamp     binary(20),
+       -- time when quality rating was assigned
+       --   NOTE: a revid can be obtained from timestamp via API
+       --  a wiki-format timestamp
+
+       r_importance            varchar(63),
+       -- importance rating
+
+       r_importance_timestamp  binary(20),
+       -- time when importance rating was assigned
+       -- a wiki-style timestamp
+
+       primary key (r_project, r_namespace, r_article)
+) /*$wgDBTableOptions*/;
+
+CREATE INDEX /*i*/r_article ON /*_*/ratings (r_namespace, r_article);
+CREATE INDEX /*i*/r_project ON /*_*/ratings (r_project);


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to