Physikerwelt has uploaded a new change for review. https://gerrit.wikimedia.org/r/186633
Change subject: Reformat maintenance scripts ...................................................................... Reformat maintenance scripts Change-Id: I39cc729f79b7045e712b4079d8497d065dc5fdad --- M maintenance/BatchExport.php M maintenance/BatchImport.php M maintenance/CalculateDistances.php M maintenance/CleanMathTable.php M maintenance/CreateBaseXMathTable.php M maintenance/CreateDB2MathTable.php M maintenance/CreateMWSHarvest.php M maintenance/ExtractFeatures.php M maintenance/GenerateFeatureTable.php M maintenance/GenerateWorkload.php M maintenance/IndexBase.php M maintenance/MathMLFilter.php M maintenance/UpdateMath.php M maintenance/batch.sh M maintenance/ibm_driver.sh 15 files changed, 452 insertions(+), 266 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/MathSearch refs/changes/33/186633/1 diff --git a/maintenance/BatchExport.php b/maintenance/BatchExport.php index 2305a1f..1ef82cb 100644 --- a/maintenance/BatchExport.php +++ b/maintenance/BatchExport.php @@ -21,13 +21,17 @@ require_once( __DIR__ . '/../../../maintenance/Maintenance.php' ); +/** + * Class BatchExport + */ class BatchExport extends Maintenance { /** * */ public function __construct() { parent::__construct(); - $this->mDescription = "Exports submissions to a folder. \n Each run is named after the following convention: \n \$userName-\$runName-\$runId.csv"; + $this->mDescription = + "Exports submissions to a folder. \n Each run is named after the following convention: \n \$userName-\$runName-\$runId.csv"; $this->addArg( "dir", "The output directory", true ); } @@ -36,9 +40,9 @@ */ public function execute() { $dir = $this->getArg( 0 ); - if ( ! is_dir($dir) ){ - $this->output("{$dir} is not a directory.\n"); - exit(1); + if ( !is_dir( $dir ) ) { + $this->output( "{$dir} is not a directory.\n" ); + exit( 1 ); } $dbr = wfGetDB( DB_SLAVE ); //runId INT PRIMARY KEY AUTO_INCREMENT NOT NULL, @@ -50,10 +54,10 @@ foreach ( $res as $row ) { $user = User::newFromId( $row->userId ); $username = $user->getName(); - $runName = preg_replace( "#/#","_", escapeSingleString( $row->runName )); + $runName = preg_replace( "#/#", "_", escapeSingleString( $row->runName ) ); $fn = "$dir/$username-$runName-{$row->runId}.csv"; - $this->output("Export to file $fn.\n"); - $fh = fopen( $fn, 'w' ); + $this->output( "Export to file $fn.\n" ); + $fh = fopen( $fn, 'w' ); fwrite( $fh, SpecialMathDownloadResult::run2CSV( $row->runId ) ); fclose( $fh ); } @@ -61,4 +65,5 @@ } $maintClass = "BatchExport"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/BatchImport.php b/maintenance/BatchImport.php index 63fb79f..7b641a8 100644 --- a/maintenance/BatchImport.php +++ b/maintenance/BatchImport.php @@ -21,22 +21,23 @@ require_once( __DIR__ . '/../../../maintenance/Maintenance.php' ); +/** + * Class BatchImport + */ class BatchImport extends Maintenance { private $dir; private $overwrite; /** - * @var DatabaseBase - */ - private $db; - /** * */ public function __construct() { parent::__construct(); - $this->mDescription = "Batch imports submissions from a folder. \n Processes CSV files that follow the naming convention: \n \$userName-\$runName.csv"; + $this->mDescription = + "Batch imports submissions from a folder. \n Processes CSV files that follow the naming convention: \n \$userName-\$runName.csv"; $this->addArg( "dir", "The directory to be read", true ); - $this->addOption( "overwrite" , "Overwrite existing runs with the same name.", false, false, "o" ); + $this->addOption( "overwrite", "Overwrite existing runs with the same name.", false, false, + "o" ); } /** @@ -45,32 +46,34 @@ public function execute() { $this->dir = $this->getArg( 0 ); $this->overwrite = $this->getOption( 'overwrite' ); - if( $this->overwrite ){ + if ( $this->overwrite ) { $this->output( "Loaded with option overwrite enabled .\n" ); } - if ( ! is_dir($this->dir) ){ - $this->output("{$this->dir} is not a directory.\n"); - exit(1); + if ( !is_dir( $this->dir ) ) { + $this->output( "{$this->dir} is not a directory.\n" ); + exit( 1 ); } - $files = new GlobIterator($this->dir."/*-*.csv"); + $files = new GlobIterator( $this->dir . "/*-*.csv" ); foreach ( $files as $file ) { $fn = $file->getFilename(); - if ( preg_match( "/(?P<user>.*?)-(?P<runName>.*?)\\.csv/", $fn,$matches) ){ + if ( preg_match( "/(?P<user>.*?)-(?P<runName>.*?)\\.csv/", $fn, $matches ) ) { $user = User::newFromName( $matches['user'] ); - if( $user->getId() > 0 ){ - $this->output("Importing filename $fn for userId {$user->getId()}.\n"); - $importer = new ImportCsv($user); - $result = $importer->execute( fopen($file,'r'), $matches['runName'], $this->overwrite ); - foreach( $importer->getWarnings() as $warning){ - $this->output("warning: $warning \n"); + if ( $user->getId() > 0 ) { + $this->output( "Importing filename $fn for userId {$user->getId()}.\n" ); + $importer = new ImportCsv( $user ); + $result = + $importer->execute( fopen( $file, 'r' ), $matches['runName'], + $this->overwrite ); + foreach ( $importer->getWarnings() as $warning ) { + $this->output( "warning: $warning \n" ); } - if ( $result !== true ){ - $this->output("$result\n"); + if ( $result !== true ) { + $this->output( "$result\n" ); } else { $this->output( "File $fn imported as {$importer->getRunId()} \n" ); } } else { - $this->output("User {$matches['user']} is invalid. Skipping file $fn.\n"); + $this->output( "User {$matches['user']} is invalid. Skipping file $fn.\n" ); } } } @@ -78,4 +81,5 @@ } $maintClass = "BatchImport"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/CalculateDistances.php b/maintenance/CalculateDistances.php index 20aa23d..2477a67 100644 --- a/maintenance/CalculateDistances.php +++ b/maintenance/CalculateDistances.php @@ -21,6 +21,9 @@ require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' ); +/** + * Class CalculateDistances + */ class CalculateDistances extends Maintenance { const RTI_CHUNK_SIZE = 100; /**@var DatabaseBase $dbw */ @@ -38,7 +41,8 @@ public function __construct() { parent::__construct(); $this->mDescription = 'Outputs page text to stdout'; - $this->addOption( 'page9', 'Ignore pages with only 9 equations or less.', false, false, "9" ); + $this->addOption( 'page9', 'Ignore pages with only 9 equations or less.', false, false, + "9" ); $this->addArg( 'min', "If set processing is started at the page with curid>min", false ); $this->addArg( 'max', "If set processing is stopped at the page with curid<=max", false ); } @@ -57,10 +61,14 @@ $conds .= " AND pagestat_pageid <= $max"; } if ( $this->getOption( 'page9', false ) ) { - $res = $this->db->select( array( 'mathpage9' , 'mathpagestat'), array( 'page_id' ,'pagestat_pageid') , - $conds . ' AND pagestat_pageid = page_id', __METHOD__, array( 'DISTINCT' ) ); + $res = + $this->db->select( array( 'mathpage9', 'mathpagestat' ), + array( 'page_id', 'pagestat_pageid' ), + $conds . ' AND pagestat_pageid = page_id', __METHOD__, array( 'DISTINCT' ) ); } else { - $res = $this->db->select( 'mathpagestat', 'pagestat_pageid', $conds, __METHOD__, array( 'DISTINCT' ) ); + $res = + $this->db->select( 'mathpagestat', 'pagestat_pageid', $conds, __METHOD__, + array( 'DISTINCT' ) ); } foreach ( $res as $row ) { array_push( $this->pagelist, $row->pagestat_pageid ); @@ -72,22 +80,23 @@ /** * Populates the search index with content from all pages */ - protected function populateSearchIndex( ) { + protected function populateSearchIndex() { $n = 0; - $count = sizeof($this->pagelist); + $count = sizeof( $this->pagelist ); $this->output( "Rebuilding index fields for $count pages...\n" ); while ( $n < $count ) { if ( $n ) { $this->output( $n . " of $count \n" ); } - $this->dbw->begin(); - for($j=0;$j<self::RTI_CHUNK_SIZE;$j++){ + $this->dbw->begin(); + for ( $j = 0; $j < self::RTI_CHUNK_SIZE; $j ++ ) { //TODO: USE PREPARED STATEMENTS $pid = $this->pagelist[$n]; - $sql = "INSERT IGNORE INTO mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value)\n" - . "SELECT DISTINCT $pid,`pagestat_pageid`,\n" - . "CosProd( $pid,`pagestat_pageid`) FROM `mathpagestat` m "; - if ( $this->getOption( 'page9', false ) ){ + $sql = + "INSERT IGNORE INTO mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value)\n" . + "SELECT DISTINCT $pid,`pagestat_pageid`,\n" . + "CosProd( $pid,`pagestat_pageid`) FROM `mathpagestat` m "; + if ( $this->getOption( 'page9', false ) ) { $sql .= " JOIN (SELECT page_id from mathpage9) as r WHERE m.pagestat_pageid=r.page_id AND "; } else { $sql .= " WHERE "; @@ -97,7 +106,7 @@ $start = microtime( true ); $this->dbw->query( $sql ); echo 'done in ' . ( microtime( true ) - $start ) . "\n"; - $n++; + $n ++; } $start = microtime( true ); $this->dbw->commit(); @@ -107,4 +116,5 @@ } $maintClass = "CalculateDistances"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/CleanMathTable.php b/maintenance/CleanMathTable.php index b80f200..10e28a7 100644 --- a/maintenance/CleanMathTable.php +++ b/maintenance/CleanMathTable.php @@ -21,6 +21,9 @@ require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' ); +/** + * Class CleanMathTable + */ class CleanMathTable extends Maintenance { const RTI_CHUNK_SIZE = 10; public $purge = false; @@ -29,14 +32,18 @@ * @var DatabaseBase */ private $db; + /** * */ public function __construct() { parent::__construct(); $this->mDescription = 'Outputs page text to stdout'; - $this->addOption( 'purge', "If set all formulae are rendered again from strech. (Very time consuming!)", false, false, "f" ); + $this->addOption( 'purge', + "If set all formulae are rendered again from strech. (Very time consuming!)", false, + false, "f" ); } + /** * The idea is basically to select the math elements that do not have a corresponding mathindex entry. * Basically that means: @@ -52,4 +59,5 @@ } $maintClass = "CleanMathTable"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/CreateBaseXMathTable.php b/maintenance/CreateBaseXMathTable.php index 6e92ca0..ca13235 100644 --- a/maintenance/CreateBaseXMathTable.php +++ b/maintenance/CreateBaseXMathTable.php @@ -31,7 +31,7 @@ private static $mwsns = "mws:"; private static $XMLHead; private static $XMLFooter; - /** @var \BaseXSession */ + /** @var \BaseXSession */ private $session; /** @@ -39,28 +39,31 @@ */ public function __construct() { parent::__construct(); - $this->mDescription = 'Generates harvest files for the MathWebSearch Deamon.'; + $this->mDescription = 'Generates harvest files for the MathWebSearch Daemon.'; $this->addOption( 'mwsns', 'The namespace or mws normally "mws:"', false ); $this->addOption( 'truncate', 'If set the database will be recreated.' ); } /** - * @param unknown $row + * @param stdClass $row + * * @return string */ protected function generateIndexString( $row ) { $out = ""; - $xml = simplexml_load_string( utf8_decode($row->math_mathml) ); + $xml = simplexml_load_string( utf8_decode( $row->math_mathml ) ); if ( !$xml ) { echo "ERROR while converting:\n " . var_export( $row->math_mathml, true ) . "\n"; - foreach ( libxml_get_errors() as $error ) + foreach ( libxml_get_errors() as $error ) { echo "\t", $error->message; + } libxml_clear_errors(); return ""; } $out .= "\n<" . self::$mwsns . "expr url=\"" . - MathSearchHooks::generateMathAnchorString( $row->mathindex_revision_id, $row->mathindex_anchor, '' ) . "\">\n\t"; - $out .= utf8_decode( $row->math_mathml );// $xml->math->children()->asXML(); + MathSearchHooks::generateMathAnchorString( $row->mathindex_revision_id, + $row->mathindex_anchor, '' ) . "\">\n\t"; + $out .= utf8_decode( $row->math_mathml );// $xml->math->children()->asXML(); $out .= "\n</" . self::$mwsns . "expr>\n"; // TODO: This does not work yet. // Find out how to insert new data without to write it into a temporary file @@ -68,43 +71,51 @@ return $out; } - protected function getHead(){ + protected function getHead() { return self::$XMLHead; } - protected function getFooter(){ + + protected function getFooter() { return self::$XMLFooter; } + /** * @param string $fn - * @param int $min - * @param int $inc + * @param int $min + * @param int $inc + * * @return boolean */ protected function wFile( $fn, $min, $inc ) { - $retval = parent::wFile($fn,$min,$inc); - $this->session->execute("add $fn"); + $retval = parent::wFile( $fn, $min, $inc ); + $this->session->execute( "add $fn" ); return $retval; } + /** * */ public function execute() { global $wgMathSearchBaseXDatabaseName; self::$mwsns = $this->getOption( 'mwsns', '' ); - self::$XMLHead = "<?xml version=\"1.0\"?>\n<" . self::$mwsns . "harvest xmlns:mws=\"http://search.mathweb.org/ns\" xmlns:m=\"http://www.w3.org/1998/Math/MathML\">"; + self::$XMLHead = + "<?xml version=\"1.0\"?>\n<" . self::$mwsns . + "harvest xmlns:mws=\"http://search.mathweb.org/ns\" xmlns:m=\"http://www.w3.org/1998/Math/MathML\">"; self::$XMLFooter = "</" . self::$mwsns . "harvest>"; $this->session = new BaseXSession(); - if( $this->getOption('truncate',false) ){ - $this->session->execute("open ".$wgMathSearchBaseXDatabaseName); + if ( $this->getOption( 'truncate', false ) ) { + $this->session->execute( "open " . $wgMathSearchBaseXDatabaseName ); } else { - $this->session->execute("create db ".$wgMathSearchBaseXDatabaseName); + $this->session->execute( "create db " . $wgMathSearchBaseXDatabaseName ); } parent::execute(); } - public function __destruct(){ + + public function __destruct() { $this->session->close(); } } $maintClass = "CreateBaseXMathTable"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/CreateDB2MathTable.php b/maintenance/CreateDB2MathTable.php index 3b3f008..11bd92f 100644 --- a/maintenance/CreateDB2MathTable.php +++ b/maintenance/CreateDB2MathTable.php @@ -30,6 +30,9 @@ private $conn; private $time; + /** + * + */ public function __construct() { parent::__construct(); $this->mDescription = 'Exports a db2 compatible math index table.'; @@ -37,44 +40,62 @@ } /** - * @param unknown $row + * @param stdClass $row + * * @return string */ protected function generateIndexString( $row ) { - $mo = MathObject::constructformpagerow($row); - $out = '"'. $mo->getMd5().'"'; - $out .= ',"'. $mo->getTex().'"'; - $out .= ','. $row->mathindex_revision_id .''; - $out .= ','. $row->mathindex_anchor.''; - $out .= ',"'.str_replace(array('"',"\n"),array('"',' '), $mo->getMathml()).'"'; - $res = db2_execute($this->statment, array($mo->getMd5(),$mo->getTex(),$row->mathindex_revision_id,$row->mathindex_anchor,$mo->getMathml())); - if ( ! $res ){ + $mo = MathObject::constructformpagerow( $row ); + $out = '"' . $mo->getMd5() . '"'; + $out .= ',"' . $mo->getTex() . '"'; + $out .= ',' . $row->mathindex_revision_id . ''; + $out .= ',' . $row->mathindex_anchor . ''; + $out .= ',"' . str_replace( array( '"', "\n" ), array( '"', ' ' ), $mo->getMathml() ) . '"'; + $res = + db2_execute( $this->statment, array( + $mo->getMd5(), + $mo->getTex(), + $row->mathindex_revision_id, + $row->mathindex_anchor, + $mo->getMathml() + ) ); + if ( !$res ) { echo db2_stmt_errormsg(); } - return $out."\n"; + return $out . "\n"; } + /** + * @param string $fn + * @param int $min + * @param int $inc + * + * @return bool + */ protected function wFile( $fn, $min, $inc ) { - $res = db2_commit($this->conn); - if ( $res ){ - echo db2_stmt_errormsg(); + $res = db2_commit( $this->conn ); + if ( $res ) { + echo db2_stmt_errormsg(); + } + $delta = microtime( true ) - $this->time; + $this->time = microtime( true ); + echo 'took ' . number_format( $delta, 1 ) . "s \n"; + return parent::wFile( $fn, $min, $inc ); } - $delta = microtime(true) - $this->time ; - $this->time = microtime(true); - echo 'took '. number_format($delta ,1) ."s \n"; - return parent::wFile( $fn, $min, $inc ); -} public function execute() { global $wgMathSearchDB2ConnStr; - $this->time = microtime(true); - $this->conn = db2_connect($wgMathSearchDB2ConnStr, '', ''); - if ( $this->conn ){ - if ( $this->getOption('truncate' , false ) ){ - db2_exec( $this->conn , 'DROP TABLE "math"'); - db2_exec( $this->conn , 'CREATE TABLE "math" ("math_md5" CHAR(32), "math_tex" VARCHAR(1000), "mathindex_revision_id" INTEGER, "mathindex_anchord" INTEGER, "math_mathml" XML)'); + $this->time = microtime( true ); + $this->conn = db2_connect( $wgMathSearchDB2ConnStr, '', '' ); + if ( $this->conn ) { + if ( $this->getOption( 'truncate', false ) ) { + db2_exec( $this->conn, 'DROP TABLE "math"' ); + db2_exec( $this->conn, + 'CREATE TABLE "math" ("math_md5" CHAR(32), "math_tex" VARCHAR(1000), "mathindex_revision_id" INTEGER, "mathindex_anchord" INTEGER, "math_mathml" XML)' ); } - $this->statment = db2_prepare( $this->conn ,'insert into "math" ("math_md5", "math_tex", "mathindex_revision_id", "mathindex_anchord", "math_mathml") values(?, ?, ?, ?, ?)'); + $this->statment = + db2_prepare( $this->conn, + 'INSERT INTO "math" ("math_md5", "math_tex", "mathindex_revision_id", "mathindex_anchord", "math_mathml") VALUES(?, ?, ?, ?, ?)' ); //db2_autocommit($this->conn , DB2_AUTOCOMMIT_OFF); } parent::execute(); @@ -82,4 +103,5 @@ } $maintClass = "CreateDB2MathTable"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/CreateMWSHarvest.php b/maintenance/CreateMWSHarvest.php index 38e5ac1..ed9d262 100644 --- a/maintenance/CreateMWSHarvest.php +++ b/maintenance/CreateMWSHarvest.php @@ -37,32 +37,34 @@ */ public function __construct() { parent::__construct(); - $this->mDescription = 'Generates harvest files for the MathWebSearch Deamon.'; + $this->mDescription = 'Generates harvest files for the MathWebSearch Deacon.'; $this->addOption( 'mwsns', 'The namespace or mws normally "mws:"', false ); } /** - * @param unknown $row + * @param stdClass $row + * * @return string */ protected function generateIndexString( $row ) { $out = ""; - $xml = simplexml_load_string( utf8_decode($row->math_mathml) ); + $xml = simplexml_load_string( utf8_decode( $row->math_mathml ) ); if ( !$xml ) { echo "ERROR while converting:\n " . var_export( $row->math_mathml, true ) . "\n"; - foreach ( libxml_get_errors() as $error ) + foreach ( libxml_get_errors() as $error ) { echo "\t", $error->message; + } libxml_clear_errors(); return ""; } // if ( $xml->math ) { - // $smath = $xml->math->semantics-> { 'annotation-xml' } ->children()->asXML(); + // $smath = $xml->math->semantics-> { 'annotation-xml' } ->children()->asXML(); $out .= "\n<" . self::$mwsns . "expr url=\"" . - MathSearchHooks::generateMathAnchorString( $row->mathindex_revision_id, $row->mathindex_anchor, '' ) . - "\">\n\t"; - $out .= utf8_decode( $row->math_mathml );// $xml->math->children()->asXML(); - $out .= "\n</" . self::$mwsns . "expr>\n"; - return $out; + MathSearchHooks::generateMathAnchorString( $row->mathindex_revision_id, + $row->mathindex_anchor, '' ) . "\">\n\t"; + $out .= utf8_decode( $row->math_mathml );// $xml->math->children()->asXML(); + $out .= "\n</" . self::$mwsns . "expr>\n"; + return $out; /*} else { var_dump($xml); die("nomath"); @@ -70,22 +72,27 @@ } - protected function getHead(){ + protected function getHead() { return self::$XMLHead; } - protected function getFooter(){ + + protected function getFooter() { return self::$XMLFooter; } + /** * */ public function execute() { self::$mwsns = $this->getOption( 'mwsns', '' ); - self::$XMLHead = "<?xml version=\"1.0\"?>\n<" . self::$mwsns . "harvest xmlns:mws=\"http://search.mathweb.org/ns\" xmlns:m=\"http://www.w3.org/1998/Math/MathML\">"; + self::$XMLHead = + "<?xml version=\"1.0\"?>\n<" . self::$mwsns . + "harvest xmlns:mws=\"http://search.mathweb.org/ns\" xmlns:m=\"http://www.w3.org/1998/Math/MathML\">"; self::$XMLFooter = "</" . self::$mwsns . "harvest>"; parent::execute(); } } $maintClass = "CreateMWSHarvest"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/ExtractFeatures.php b/maintenance/ExtractFeatures.php index 838d775..17aca4c 100644 --- a/maintenance/ExtractFeatures.php +++ b/maintenance/ExtractFeatures.php @@ -21,29 +21,42 @@ require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' ); +/** + * Class ExtractFeatures + */ class ExtractFeatures extends Maintenance { const RTI_CHUNK_SIZE = 100; public $purge = false; + /** @type DatabaseMysql */ public $dbw = null; /** * @var DatabaseBase */ private $db; + /** * */ public function __construct() { parent::__construct(); $this->mDescription = 'Outputs page text to stdout'; - $this->addOption( 'purge', "If set all formulae are rendered again from strech. (Very time consuming!)", false, false, "f" ); - $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", false ); - $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", false ); + $this->addOption( 'purge', + "If set all formulae are rendered again from strech. (Very time consuming!)", false, + false, "f" ); + $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", + false ); + $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", + false ); } + /** * Populates the search index with content from all pages + * + * @param int $n + * @param int $cmax */ - protected function populateSearchIndex( $n = 0, $cmax = -1 ) { + protected function populateSearchIndex( $n = 0, $cmax = - 1 ) { $res = $this->db->select( 'page', 'MAX(page_id) AS count' ); $s = $this->db->fetchObject( $res ); $count = $s->count; @@ -59,21 +72,25 @@ } $end = $n + self::RTI_CHUNK_SIZE - 1; - $res = $this->db->select( array( 'page', 'revision', 'text' ), + $res = + $this->db->select( array( 'page', 'revision', 'text' ), array( 'page_id', 'page_namespace', 'page_title', 'old_flags', 'old_text' ), - array( "page_id BETWEEN $n AND $end", 'page_latest = rev_id', 'rev_text_id = old_id' ), - __METHOD__ - ); + array( + "page_id BETWEEN $n AND $end", + 'page_latest = rev_id', + 'rev_text_id = old_id' + ), __METHOD__ ); $this->dbw->begin(); // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; foreach ( $res as $s ) { $revtext = Revision::getRevisionText( $s ); - $fcount += self::doUpdate( $s->page_id, $revtext, $s->page_title, $this->purge, $this->dbw ); + $fcount += self::doUpdate( $s->page_id, $revtext, $s->page_title, $this->purge, + $this->dbw ); } // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; $start = microtime( true ); $this->dbw->commit(); - echo " committed in " . ( microtime( true ) -$start ) . "s\n\n"; + echo " committed in " . ( microtime( true ) - $start ) . "s\n\n"; // echo "after" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; $n += self::RTI_CHUNK_SIZE; } @@ -81,35 +98,40 @@ $sql = "TRUNCATE TABLE `mathvarstat`"; $this->dbw->query( $sql ); $this->output( "Generate mathvarstat\n" ); - $sql = "INSERT INTO `mathvarstat` (`varstat_featurename` , `varstat_featuretype`, `varstat_featurecount`)\n" - . "SELECT `mathobservation_featurename` , `mathobservation_featuretype` , count( * ) AS CNT\n" - . "FROM `mathobservation`\n" - . "JOIN mathindex ON `mathobservation_inputhash` = mathindex_inputhash\n" - . "GROUP BY `mathobservation_featurename` , `mathobservation_featuretype`\n" - . "ORDER BY CNT DESC"; + $sql = + "INSERT INTO `mathvarstat` (`varstat_featurename` , `varstat_featuretype`, `varstat_featurecount`)\n" . + "SELECT `mathobservation_featurename` , `mathobservation_featuretype` , count( * ) AS CNT\n" . + "FROM `mathobservation`\n" . + "JOIN mathindex ON `mathobservation_inputhash` = mathindex_inputhash\n" . + "GROUP BY `mathobservation_featurename` , `mathobservation_featuretype`\n" . + "ORDER BY CNT DESC"; $this->dbw->query( $sql ); $this->output( "Clear mathpagestat\n" ); $sql = "TRUNCATE TABLE `mathpagestat`"; $this->dbw->query( $sql ); $this->output( "Generate mathpagestat\n" ); - $sql = "INSERT INTO `mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`)\n" - . "SELECT varstat_id, mathindex_revision_id, count(*) as CNT FROM `mathobservation` JOIN mathindex on `mathobservation_inputhash` =mathindex_inputhash\n" - . "JOIN mathvarstat on varstat_featurename = `mathobservation_featurename` and varstat_featuretype = `mathobservation_featuretype`\n" - . " GROUP by `mathobservation_featurename`, `mathobservation_featuretype`,mathindex_revision_id ORDER BY CNT DESC"; + $sql = + "INSERT INTO `mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`)\n" . + "SELECT varstat_id, mathindex_revision_id, count(*) AS CNT FROM `mathobservation` JOIN mathindex ON `mathobservation_inputhash` =mathindex_inputhash\n" . + "JOIN mathvarstat ON varstat_featurename = `mathobservation_featurename` AND varstat_featuretype = `mathobservation_featuretype`\n" . + " GROUP BY `mathobservation_featurename`, `mathobservation_featuretype`,mathindex_revision_id ORDER BY CNT DESC"; $this->dbw->query( $sql ); $this->output( "Updated {$fcount} formulae!\n" ); } + /** - * @param unknown $pId - * @param unknown $pText - * @param string $pTitle - * @param string $purge + * @param $pid + * @param string $pText + * @param string $pTitle + * @param bool|string $purge + * @param $dbw + * * @return number + * @internal param unknown $pId */ - private static function doUpdate( $pid, $pText, $pTitle = "", $purge = false , $dbw ) { + private static function doUpdate( $pid, $pText, $pTitle = "", $purge = false, $dbw ) { // TODO: fix link id problem $anchorID = 0; - $res = ""; $math = MathObject::extractMathTagsFromWikiText( $pText ); $matches = sizeof( $math ); if ( $matches ) { @@ -118,12 +140,13 @@ $mo = new MathObject( $formula[1] ); $mo->updateObservations( $dbw ); // Enable indexing of math formula - $anchorID++; + $anchorID ++; } return $matches; } return 0; } + /** * */ @@ -132,9 +155,10 @@ $this->purge = $this->getOption( "purge", false ); $this->db = wfGetDB( DB_MASTER ); $this->output( "Done.\n" ); - $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, -1 ) ); + $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, - 1 ) ); } } $maintClass = "ExtractFeatures"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/GenerateFeatureTable.php b/maintenance/GenerateFeatureTable.php index 59c75ad..ff72f37 100644 --- a/maintenance/GenerateFeatureTable.php +++ b/maintenance/GenerateFeatureTable.php @@ -21,29 +21,42 @@ require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' ); -class UpdateMath extends Maintenance { +/** + * Class UpdateMath + */ +class GenerateFeatureTable extends Maintenance { const RTI_CHUNK_SIZE = 100000; public $purge = false; + /** @type DatabaseMysql */ public $dbw = null; /** * @var DatabaseBase */ private $db; + /** * */ public function __construct() { parent::__construct(); $this->mDescription = 'Outputs page text to stdout'; - $this->addOption( 'purge', "If set all formulae are rendered again from strech. (Very time consuming!)", false, false, "f" ); - $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", false ); - $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", false ); + $this->addOption( 'purge', + "If set all formulae are rendered again from strech. (Very time consuming!)", false, + false, "f" ); + $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", + false ); + $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", + false ); } + /** * Populates the search index with content from all pages + * + * @param int $n + * @param int $cmax */ - protected function populateSearchIndex( $n = 0, $cmax = -1 ) { + protected function populateSearchIndex( $n = 0, $cmax = - 1 ) { $res = $this->db->select( 'page', 'MAX(page_id) AS count' ); $s = $this->db->fetchObject( $res ); $count = $s->count; @@ -59,11 +72,12 @@ } $end = $n + self::RTI_CHUNK_SIZE - 1; - $res = $this->db->select( array( 'page', 'revision', 'text' ), - array( 'page_id' ), - array( "page_id BETWEEN $n AND $end", 'page_latest = rev_id', 'rev_text_id = old_id' ), - __METHOD__ - ); + $res = + $this->db->select( array( 'page', 'revision', 'text' ), array( 'page_id' ), array( + "page_id BETWEEN $n AND $end", + 'page_latest = rev_id', + 'rev_text_id = old_id' + ), __METHOD__ ); $this->dbw->begin(); // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; foreach ( $res as $s ) { @@ -74,28 +88,40 @@ } // $this->output( "Updated {$fcount} formulae!\n" ); } + /** - * @param unknown $pId - * @param unknown $pText - * @param string $pTitle - * @param string $purge + * @param $pid + * * @return number + * @internal param unknown $pId + * @internal param unknown $pText + * @internal param string $pTitle + * @internal param string $purge */ private function doUpdate( $pid ) { // TODO: fix link id problem - $anchorID = 0; - $res = $this->db->select( array( 'mathpagestat', 'mathvarstat' ), - array( 'pagestat_pageid', 'pagestat_featurename', 'pagestat_featuretype', 'pagestat_featurecount', 'varstat_id', 'varstat_featurecount' ), - array( 'pagestat_pageid' => $pid, 'pagestat_featurename = varstat_featurename', 'pagestat_featuretype=varstat_featuretype' ), - __METHOD__ - ); + $res = + $this->db->select( array( 'mathpagestat', 'mathvarstat' ), array( + 'pagestat_pageid', + 'pagestat_featurename', + 'pagestat_featuretype', + 'pagestat_featurecount', + 'varstat_id', + 'varstat_featurecount' + ), array( + 'pagestat_pageid' => $pid, + 'pagestat_featurename = varstat_featurename', + 'pagestat_featuretype=varstat_featuretype' + ), __METHOD__ ); foreach ( $res as $row ) { $this->output( $pid . ',' . $row->varstat_id . ',' . $row->pagestat_featurecount - /// $row->varstat_featurecount - . "\n" );// .';'.$row->pagestat_featuretype.utf8_decode($row->pagestat_featurename)."\n"); + /// $row->varstat_featurecount + . + "\n" );// .';'.$row->pagestat_featuretype.utf8_decode($row->pagestat_featurename)."\n"); } return 0; } + /** * */ @@ -103,9 +129,10 @@ $this->dbw = wfGetDB( DB_MASTER ); $this->purge = $this->getOption( "purge", false ); $this->db = wfGetDB( DB_MASTER ); - $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, -1 ) ); + $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, - 1 ) ); } } -$maintClass = "UpdateMath"; +$maintClass = "GenerateFeatureTable"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/GenerateWorkload.php b/maintenance/GenerateWorkload.php index abaef0c..0445827 100644 --- a/maintenance/GenerateWorkload.php +++ b/maintenance/GenerateWorkload.php @@ -31,29 +31,38 @@ private $id = 0; private $selectivity = PHP_INT_MAX; + /** + * + */ public function __construct() { parent::__construct(); $this->mDescription = 'Generates a workload of sample queries.'; - $this->addOption( 'selectivity' , "Specifies the selectivity for each individual equation", false, true, "S"); - $this->addOption ( 'lastId', "Specifies to start the ID counter after the given id. For example '-l 1' would start with id 2.", false, true, "l" ); - $this->addOption ( 'overwrite', "Overwrite existing draft queries ", false, false, "o" ); + $this->addOption( 'selectivity', "Specifies the selectivity for each individual equation", + false, true, "S" ); + $this->addOption( 'lastId', + "Specifies to start the ID counter after the given id. For example '-l 1' would start with id 2.", + false, true, "l" ); + $this->addOption( 'overwrite', "Overwrite existing draft queries ", false, false, "o" ); } /** * @param ResultWrapper $row + * * @return string */ - protected function generateIndexString( $row ){ - if ( mt_rand() <= $this->selectivity ){ - $q = MathQueryObject::newQueryFromEquationRow($row, ++$this->id ); - $q->saveToDatabase( $this->getOption("overwrite", false) ); + protected function generateIndexString( $row ) { + if ( mt_rand() <= $this->selectivity ) { + $q = MathQueryObject::newQueryFromEquationRow( $row, ++ $this->id ); + $q->saveToDatabase( $this->getOption( "overwrite", false ) ); $out = $q->exportTexDocument(); - if( $out == false ){ - echo 'problem with ' . var_export($q,true) . "\n"; + if ( $out == false ) { + echo 'problem with ' . var_export( $q, true ) . "\n"; $out = ''; } return $out; - } else return ''; + } else { + return ''; + } } @@ -61,18 +70,17 @@ $i = 0; $inc = $this->getArg( 1, 100 ); $this->id = $this->getOption( 'lastId', 0 ); - $sel = $this->getOption( "selectivity", .1 ); - $this->selectivity = (int) ($sel * mt_getrandmax()) ; + $sel = $this->getOption( "selectivity", .1 ); + $this->selectivity = (int)( $sel * mt_getrandmax() ); $db = wfGetDB( DB_SLAVE ); echo "getting list of all equations from the database\n"; - $this->res = $db->select( - array( 'mathindex' ), - array( 'mathindex_revision_id', 'mathindex_anchor', 'mathindex_inputhash' ), - true - , __METHOD__ - ,array('LIMIT' => $this->getOption( 'limit', (int) (100/$sel) ) , - 'ORDER BY' => 'mathindex_inputhash' ) - ); + $this->res = + $db->select( array( 'mathindex' ), + array( 'mathindex_revision_id', 'mathindex_anchor', 'mathindex_inputhash' ), true, + __METHOD__, array( + 'LIMIT' => $this->getOption( 'limit', (int)( 100 / $sel ) ), + 'ORDER BY' => 'mathindex_inputhash' + ) ); do { $fn = $this->getArg( 0 ) . '/math' . sprintf( '%012d', $i ) . '.tex'; $res = $this->wFile( $fn, $i, $inc ); @@ -82,5 +90,7 @@ echo( "done" ); } } + $maintClass = "GenerateWorkload"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); \ No newline at end of file diff --git a/maintenance/IndexBase.php b/maintenance/IndexBase.php index e87bca9..9583f96 100644 --- a/maintenance/IndexBase.php +++ b/maintenance/IndexBase.php @@ -1,6 +1,6 @@ <?php /** - * Generates harvest files for the MathWebSearch Deamon. + * Generates harvest files for the MathWebSearch Daemon. * Example: php CreateMathIndex.php ~/mws_harvest_files * * This program is free software; you can redistribute it and/or modify @@ -28,6 +28,7 @@ * */ abstract class IndexBase extends Maintenance { + /** @type ResultWrapper */ protected $res; /** @@ -38,30 +39,35 @@ $this->mDescription = 'Exports data'; $this->addArg( 'dir', 'The directory where the harvest files go to.' ); $this->addArg( 'ffmax', "The maximal number of formula per file.", false ); - $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", false ); - $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", false ); - $this->addOption( 'limit', 'The maximal number of database entries to be considered', false ,true , "L"); + $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", + false ); + $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", + false ); + $this->addOption( 'limit', 'The maximal number of database entries to be considered', false, + true, "L" ); } /** - * @param unknown $row + * @param stdClass $row + * * @return string */ protected abstract function generateIndexString( $row ); /** - * @param unknown $fn - * @param unknown $min - * @param unknown $inc + * @param string $fn + * @param int $min + * @param int $inc + * * @return boolean */ protected function wFile( $fn, $min, $inc ) { $out = $this->getHead(); $max = min( $min + $inc, $this->res->numRows() ); - for ( $i = $min; $i < $max; $i++ ) { + for ( $i = $min; $i < $max; $i ++ ) { $this->res->seek( $i ); $out .= $this->generateIndexString( $this->res->fetchObject() ); - restore_error_handler ( ); + restore_error_handler(); } $out .= "\n" . $this->getFooter(); $fh = fopen( $fn, 'w' ); @@ -70,10 +76,11 @@ fwrite( $fh, $out ); fclose( $fh ); echo "written file $fn with entries($min ... $max)\n"; - if ( $max < $this->res->numRows() -1 ) + if ( $max < $this->res->numRows() - 1 ) { return true; - else + } else { return false; + } } /** @@ -85,17 +92,21 @@ $inc = $this->getArg( 1, 100 ); $db = wfGetDB( DB_SLAVE ); echo "getting list of all equations from the database\n"; - $this->res = $db->select( - array( 'mathindex', 'mathlatexml' ), - array( 'mathindex_revision_id', 'mathindex_anchor', 'math_mathml', 'math_inputhash', 'mathindex_inputhash' ), - array( 'math_inputhash = mathindex_inputhash', - 'mathindex_revision_id >= '. $this->getArg( 2, 0), - 'mathindex_revision_id <= '. $this->getArg( 3, PHP_INT_MAX)) - , __METHOD__ - ,array( - 'LIMIT' => $this->getOption( 'limit', PHP_INT_MAX ) , - 'ORDER BY' => 'mathindex_revision_id' ) - ); + $this->res = + $db->select( array( 'mathindex', 'mathlatexml' ), array( + 'mathindex_revision_id', + 'mathindex_anchor', + 'math_mathml', + 'math_inputhash', + 'mathindex_inputhash' + ), array( + 'math_inputhash = mathindex_inputhash', + 'mathindex_revision_id >= ' . $this->getArg( 2, 0 ), + 'mathindex_revision_id <= ' . $this->getArg( 3, PHP_INT_MAX ) + ), __METHOD__, array( + 'LIMIT' => $this->getOption( 'limit', PHP_INT_MAX ), + 'ORDER BY' => 'mathindex_revision_id' + ) ); echo "write " . $this->res->numRows() . " results to index\n"; do { $fn = $this->getArg( 0 ) . '/math' . sprintf( '%012d', $i ) . '.xml'; @@ -104,10 +115,18 @@ } while ( $res ); echo( "done" ); } - protected function getHead(){ + + /** + * @return string + */ + protected function getHead() { return ""; } - protected function getFooter(){ + + /** + * @return string + */ + protected function getFooter() { return ""; } } diff --git a/maintenance/MathMLFilter.php b/maintenance/MathMLFilter.php index 9abf1c9..44e807c 100644 --- a/maintenance/MathMLFilter.php +++ b/maintenance/MathMLFilter.php @@ -20,17 +20,20 @@ */ # Alert the user that this is not a valid entry point to MediaWiki if they try to access the special pages file directly. if ( !defined( 'MEDIAWIKI' ) ) { - die( "This is not a valid entry point to MediaWiki.\n" - . "To run the script use:\n" - . 'php ../../../maintenance/dumpBackup.php --current --plugin=MathMLFilter:./MathDump.php --filter=mathml' - . "\n" ); + die( "This is not a valid entry point to MediaWiki.\n" . "To run the script use:\n" . + 'php ../../../maintenance/dumpBackup.php --current --plugin=MathMLFilter:./MathDump.php --filter=mathml' . + "\n" ); } /** * Simple dump output filter to exclude all talk pages. + * * @ingroup Dump */ class MathMLFilter extends DumpFilter { + /** + * @param $backupDumper + */ public static function register( $backupDumper ) { $backupDumper->registerFilter( 'mathml', 'MathMLFilter' ); @@ -38,7 +41,9 @@ /** * Callback function that replaces TeX by MathML + * * @param array $match + * * @return string */ private static function renderMath( $match ) { @@ -51,19 +56,22 @@ } /** - * Replaces the math tags with rendered Mathml - * @param unknown $pText + * Replaces the math tags with rendered MathML + * + * @param string $pText + * * @return string */ private static function replaceMath( $pText ) { $pText = Sanitizer::removeHTMLcomments( $pText ); - return preg_replace_callback( "#<math>(.*?)</math>#s", 'self::renderMath', $pText ); + return preg_replace_callback( "#<math>(.*?)</math>#s", 'self::renderMath', + $pText ); } /** - * @param $rev - * @param $string the revision text + * @param object $rev + * @param string $string the revision text */ function writeRevision( $rev, $string ) { if ( $this->sendingThisPage ) { diff --git a/maintenance/UpdateMath.php b/maintenance/UpdateMath.php index e31926c..6f6e809 100644 --- a/maintenance/UpdateMath.php +++ b/maintenance/UpdateMath.php @@ -21,6 +21,9 @@ require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' ); +/** + * Class UpdateMath + */ class UpdateMath extends Maintenance { const RTI_CHUNK_SIZE = 100; public $purge = false; @@ -28,56 +31,75 @@ private $verbose; /** @var DatabaseBase */ public $dbw = null; - /** @var MathRenderer */ + /** @var MathRenderer */ private $current; private $time = 0;//microtime( true ); private $performance = array(); - private $renderingMode = MW_MATH_LATEXML; + private $renderingMode = MW_MATH_LATEXML; /** * @var DatabaseBase */ private $db; + /** * */ public function __construct() { - $this->verbose = $this->verbose; parent::__construct(); $this->mDescription = 'Updates the index of Mathematical formulae.'; - $this->addOption( 'purge', "If set all formulae are rendered again without using caches. (Very time consuming!)", false, false, "f" ); - $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", false ); - $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", false ); - $this->addOption( 'verbose', "If set output for successful rendering will produced",false,false,'v' ); + $this->addOption( 'purge', + "If set all formulae are rendered again without using caches. (Very time consuming!)", + false, false, "f" ); + $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", + false ); + $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", + false ); + $this->addOption( 'verbose', "If set output for successful rendering will produced", false, + false, 'v' ); $this->addOption( 'SVG', "If set SVG images will be produced", false, false ); $this->addOption( 'hoooks', "If set hooks will be skipped", false, false ); $this->addOption( 'texvccheck', "If set texvccheck will be skipped", false, false ); - $this->addOption( 'mode' , 'Rendering mode to be used (0 = PNG, 5= MathML, 7=MathML)',false,true,'m'); + $this->addOption( 'mode', 'Rendering mode to be used (0 = PNG, 5= MathML, 7=MathML)', false, + true, 'm' ); } - private function time($category='default'){ + + /** + * @param string $category + * + * @return int + */ + private function time( $category = 'default' ) { global $wgMathDebug; - $delta = (microtime(true) - $this->time)*1000; - if (isset ($this->performance[$category] )) + $delta = ( microtime( true ) - $this->time ) * 1000; + if ( isset ( $this->performance[$category] ) ) { $this->performance[$category] += $delta; - else + } else { $this->performance[$category] = $delta; - if($wgMathDebug){ - $this->db->insert('mathperformance',array( + } + if ( $wgMathDebug ) { + $this->db->insert( 'mathperformance', array( 'math_inputhash' => $this->current->getInputHash(), - 'mathperformance_name' => substr($category,0,10), + 'mathperformance_name' => substr( $category, 0, 10 ), 'mathperformance_time' => $delta, - 'mathperformance_mode' => $this->renderingMode - )); + 'mathperformance_mode' => $this->renderingMode + ) ); } - $this->time = microtime(true); + $this->time = microtime( true ); - return (int) $delta; + return (int)$delta; } + /** * Populates the search index with content from all pages + * + * @param int $n + * @param int $cmax + * + * @throws DBUnexpectedError */ - protected function populateSearchIndex( $n = 0, $cmax = -1 ) { + protected function populateSearchIndex( $n = 0, $cmax = - 1 ) { $res = $this->db->select( 'page', 'MAX(page_id) AS count' ); $s = $this->db->fetchObject( $res ); $count = $s->count; @@ -93,25 +115,28 @@ } $end = $n + self::RTI_CHUNK_SIZE - 1; - $res = $this->db->select( array( 'page', 'revision', 'text' ), + $res = + $this->db->select( array( 'page', 'revision', 'text' ), array( 'page_id', 'page_namespace', 'page_title', 'old_flags', 'old_text' ), - array( "page_id BETWEEN $n AND $end", 'page_latest = rev_id', 'rev_text_id = old_id' ), - __METHOD__ - ); + array( + "page_id BETWEEN $n AND $end", + 'page_latest = rev_id', + 'rev_text_id = old_id' + ), __METHOD__ ); $this->dbw->begin(); // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; $i = $n; foreach ( $res as $s ) { echo "\np$i:"; $revtext = Revision::getRevisionText( $s ); - $fcount += $this->doUpdate( $s->page_id, $revtext, $s->page_title); - $i++; + $fcount += $this->doUpdate( $s->page_id, $revtext, $s->page_title ); + $i ++; } // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; $start = microtime( true ); $this->dbw->commit(); - echo " committed in " . ( microtime( true ) -$start ) . "s\n\n"; - var_export($this->performance); + echo " committed in " . ( microtime( true ) - $start ) . "s\n\n"; + var_export( $this->performance ); // echo "after" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; $n += self::RTI_CHUNK_SIZE; } @@ -119,14 +144,15 @@ } /** - * @param $pid - * @param unknown $pText + * @param $pid + * @param string $pText * @param string $pTitle + * * @internal param unknown $pId * @internal param string $purge * @return number */ - private function doUpdate( $pid, $pText, $pTitle = "") { + private function doUpdate( $pid, $pText, $pTitle = "" ) { $notused = ''; // TODO: fix link id problem $anchorID = 0; @@ -135,22 +161,23 @@ if ( $matches ) { echo( "\t processing $matches math fields for {$pTitle} page\n" ); foreach ( $math as $formula ) { - $this->time = microtime(true); - $renderer = MathRenderer::getRenderer( $formula[1], $formula[2], $this->renderingMode ); + $this->time = microtime( true ); + $renderer = + MathRenderer::getRenderer( $formula[1], $formula[2], $this->renderingMode ); $this->current = $renderer; - $this->time("loadClass"); + $this->time( "loadClass" ); if ( $this->getOption( "texvccheck", false ) ) { $checked = true; } else { $checked = $renderer->checkTex(); - $this->time("checkTex"); + $this->time( "checkTex" ); } if ( $checked ) { $renderer->render( $this->purge ); - if( $renderer->getMathml() ){ - $this->time("Rendering"); + if ( $renderer->getMathml() ) { + $this->time( "Rendering" ); } else { - $this->time("Failing"); + $this->time( "Failing" ); } if ( $this->getOption( "SVG", false ) ) { $svg = $renderer->getSvg(); @@ -161,25 +188,27 @@ } } } else { - $this->time("checkTex-Fail"); - echo "\nF:\t\t".$renderer->getMd5()." texvccheck error:" . $renderer->getLastError(); + $this->time( "checkTex-Fail" ); + echo "\nF:\t\t" . $renderer->getMd5() . " texvccheck error:" . + $renderer->getLastError(); continue; } - if ( ! $this->getOption( "hooks", false ) ) { - wfRunHooks( 'MathFormulaRendered', array( &$renderer, &$notused, $pid, $anchorID ) ); + if ( !$this->getOption( "hooks", false ) ) { + wfRunHooks( 'MathFormulaRendered', + array( &$renderer, &$notused, $pid, $anchorID ) ); $this->time( "hooks" ); - $anchorID++; + $anchorID ++; } - $renderer->writeCache($this->dbw); - $this->time("write Cache"); + $renderer->writeCache( $this->dbw ); + $this->time( "write Cache" ); if ( $renderer->getLastError() ) { - echo "\n\t\t". $renderer->getLastError() ; - echo "\nF:\t\t".$renderer->getMd5()." equation " . ( $anchorID -1 ) . - "-failed beginning with\n\t\t'" . substr( $formula, 0, 100 ) - . "'\n\t\tmathml:" . substr($renderer->getMathml(),0,10) ."\n "; - } else{ - if($this->verbose){ - echo "\nS:\t\t".$renderer->getMd5(); + echo "\n\t\t" . $renderer->getLastError(); + echo "\nF:\t\t" . $renderer->getMd5() . " equation " . ( $anchorID - 1 ) . + "-failed beginning with\n\t\t'" . substr( $formula, 0, 100 ) . + "'\n\t\tmathml:" . substr( $renderer->getMathml(), 0, 10 ) . "\n "; + } else { + if ( $this->verbose ) { + echo "\nS:\t\t" . $renderer->getMd5(); } } } @@ -187,6 +216,7 @@ } return 0; } + /** * */ @@ -194,15 +224,16 @@ global $wgMathValidModes; $this->dbw = wfGetDB( DB_MASTER ); $this->purge = $this->getOption( "purge", false ); - $this->verbose = $this->getOption("verbose",false); - $this->renderingMode = $this->getOption( "mode" , 7); + $this->verbose = $this->getOption( "verbose", false ); + $this->renderingMode = $this->getOption( "mode", 7 ); $this->db = wfGetDB( DB_MASTER ); $wgMathValidModes[] = $this->renderingMode; $this->output( "Loaded.\n" ); $this->time = microtime( true ); - $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, -1 ) ); + $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, - 1 ) ); } } $maintClass = "UpdateMath"; +/** @noinspection PhpIncludeInspection */ require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/batch.sh b/maintenance/batch.sh index ece28dd..6cbce26 100644 --- a/maintenance/batch.sh +++ b/maintenance/batch.sh @@ -1,9 +1,9 @@ #!/bin/sh i=0 -while [ $i -le 28 ] +while [ ${i} -le 28 ] do - j=`expr $i + 1` - echo $i - php UpdateMath.php ${i}000 ${j}000 -f >$i& - i=$j + j=`expr ${i} + 1` + echo ${i} + php UpdateMath.php ${i}000 ${j}000 -f >${i}& + i=${j} done \ No newline at end of file diff --git a/maintenance/ibm_driver.sh b/maintenance/ibm_driver.sh index 097860a..5cf8b2e 100755 --- a/maintenance/ibm_driver.sh +++ b/maintenance/ibm_driver.sh @@ -1,4 +1,4 @@ -#/bin/bash +#!/bin/bash echo "Warning: Experimental don't use in any kind of production environment." echo "Make sure that you have downloaded and extracted the" echo "Data Server Driver Package (dsdriver) to /vagrant/ibm/dsdriver" @@ -6,7 +6,7 @@ echo "Are the drivers downloaded and extracted?" select yn in "Yes" "No" do - case $yn in + case ${yn} in Yes ) break;; No ) exit;; esac -- To view, visit https://gerrit.wikimedia.org/r/186633 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I39cc729f79b7045e712b4079d8497d065dc5fdad Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/MathSearch Gerrit-Branch: master Gerrit-Owner: Physikerwelt <w...@physikerwelt.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits