Physikerwelt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/200100

Change subject: Live index update
......................................................................

Live index update

* use onPageContentSaveComplete hook
* still quite prototypical

Change-Id: Iff6e07e2276f32c11f4b828e55ba9e27bd33aa5d
---
M MathSearch.hooks.php
M MathSearch.php
A includes/MwsDumpWriter.php
M includes/engines/MathEngineBaseX.php
M includes/engines/MathEngineRest.php
M maintenance/CreateMWSHarvest.php
M maintenance/UpdateMath.php
A tests/MwsDumpWriterTest.php
8 files changed, 464 insertions(+), 79 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/MathSearch 
refs/changes/00/200100/1

diff --git a/MathSearch.hooks.php b/MathSearch.hooks.php
index de78e6d..83ae417 100644
--- a/MathSearch.hooks.php
+++ b/MathSearch.hooks.php
@@ -1,11 +1,11 @@
 <?php
+
 /**
  * MediaWiki MathSearch extension
  *
  * (c) 2012 various MediaWiki contributors
  * GPLv2 license; info in main package.
  */
-
 class MathSearchHooks {
        static $nextID = 0;
 
@@ -35,29 +35,31 @@
                if ( is_null( $updater ) ) {
                        throw new MWException( "Mathsearch extension requires 
Mediawiki 1.18 or above" );
                }
-               $type = $updater->getDB()->getType();
-               if ( $type == "mysql"  ) {
-                       $dir = __DIR__ . '/db/' ;
+               $type = $updater->getDB()->getType();
+               if ( $type == "mysql" ) {
+                       $dir = __DIR__ . '/db/';
                        $updater->addExtensionTable( 'mathindex', $dir . 
'mathindex.sql' );
-                       $updater->addExtensionTable( 'mathobservation',  $dir . 
'mathobservation.sql' );
+                       $updater->addExtensionTable( 'mathobservation', $dir . 
'mathobservation.sql' );
                        $updater->addExtensionTable( 'mathvarstat', $dir . 
'mathvarstat.sql' );
                        $updater->addExtensionTable( 'mathrevisionstat', $dir . 
'mathrevisionstat.sql' );
                        $updater->addExtensionTable( 'mathsemantics', $dir . 
'mathsemantics.sql' );
                        $updater->addExtensionTable( 'mathperformance', $dir . 
'mathperformance.sql' );
                        $updater->addExtensionTable( 'mathidentifier', $dir . 
'mathidentifier.sql' );
-                       if ( $wgMathWmcServer ){
+                       if ( $wgMathWmcServer ) {
                                $wmcDir = $dir . 'wmc/persistent/';
-                               $updater->addExtensionTable( 'math_wmc_ref', 
$wmcDir . "math_wmc_ref.sql");
-                               $updater->addExtensionTable( 'math_wmc_runs', 
$wmcDir . "math_wmc_runs.sql");
-                               $updater->addExtensionTable( 
'math_wmc_results', $wmcDir . "math_wmc_results.sql");
-                               $updater->addExtensionTable( 
'math_wmc_assessed_formula', $wmcDir . "math_wmc_assessed_formula.sql");
-                               $updater->addExtensionTable( 
'math_wmc_assessed_revision', $wmcDir . "math_wmc_assessed_revision.sql");
+                               $updater->addExtensionTable( 'math_wmc_ref', 
$wmcDir . "math_wmc_ref.sql" );
+                               $updater->addExtensionTable( 'math_wmc_runs', 
$wmcDir . "math_wmc_runs.sql" );
+                               $updater->addExtensionTable( 
'math_wmc_results', $wmcDir . "math_wmc_results.sql" );
+                               $updater->addExtensionTable( 
'math_wmc_assessed_formula',
+                                       $wmcDir . 
"math_wmc_assessed_formula.sql" );
+                               $updater->addExtensionTable( 
'math_wmc_assessed_revision',
+                                       $wmcDir . 
"math_wmc_assessed_revision.sql" );
                        }
-               } elseif ( $type == 'sqlite' ){
+               } elseif ( $type == 'sqlite' ) {
                        // Don't scare Jenkins with an exception.
                } else {
-                       throw new Exception( "Math extension does not currently 
support $type database." );
-               }
+                       throw new Exception( "Math extension does not currently 
support $type database." );
+               }
                return true;
        }
 
@@ -65,8 +67,8 @@
         * Checks if the db2 php client is installed
         * @return boolean
         */
-       public static function isDB2Supported(){
-               if ( function_exists('db2_connect') ){
+       public static function isDB2Supported() {
+               if ( function_exists( 'db2_connect' ) ) {
                        return true;
                } else {
                        return false;
@@ -81,7 +83,7 @@
         * @param string $inputHash hash of tex string (used as database entry)
         * @param string $tex the user input hash
         */
-       private static function updateIndex($revId, $eid, $inputHash, $tex){
+       private static function updateIndex( $revId, $eid, $inputHash, $tex ) {
                try {
                        $dbr = wfGetDB( DB_SLAVE );
                        $exists = $dbr->selectRow( 'mathindex',
@@ -89,15 +91,17 @@
                                array(
                                        'mathindex_revision_id' => $revId,
                                        'mathindex_anchor' => $eid,
-                                       'mathindex_inputhash' => $inputHash)
-                       ) ;
+                                       'mathindex_inputhash' => $inputHash
+                               )
+                       );
                        if ( $exists ) {
-                               wfDebugLog( 'MathSearch', 'Index $' . $tex . '$ 
already in database.');
-                               wfDebugLog( 'MathSearch', "$revId-$eid with 
hash ". bin2hex($inputHash) );
+                               wfDebugLog( 'MathSearch', 'Index $' . $tex . '$ 
already in database.' );
+                               wfDebugLog( 'MathSearch', "$revId-$eid with 
hash " . bin2hex( $inputHash ) );
                        } else {
                                self::writeMathIndex( $revId, $eid, $inputHash, 
$tex );
                        }
-               } catch ( Exception $e ) {
+               }
+               catch ( Exception $e ) {
                        wfDebugLog( "MathSearch", 'Problem writing to math 
index!'
                                . ' You might want the rebuild the index by 
running:'
                                . '"php 
extensions/MathSearch/ReRenderMath.php". The error is'
@@ -117,15 +121,15 @@
         * @return bool true if an ID has been assigned manually,
         * false if the automatic fallback math{$id} was used.
         */
-       public static function setMathId( &$id, MathRenderer $renderer, $revId) 
{
-               if ( $renderer->getID() ){
+       public static function setMathId( &$id, MathRenderer $renderer, $revId 
) {
+               if ( $renderer->getID() ) {
                        $id = $renderer->getID();
                        return true;
                } else {
-                       if ( is_null( $id ) ){
-                               $id = self::$nextID++;
+                       if ( is_null( $id ) ) {
+                               $id = self::$nextID ++;
                                $id = self::generateMathAnchorString( $revId, 
$id, '' );
-                               $renderer->setID($id);
+                               $renderer->setID( $id );
                        }
                        return false;
                }
@@ -143,10 +147,12 @@
                if ( $revId > 0 ) { // Only store something if a pageid was set.
                        // Use manually assigned IDs whenever possible
                        // and fallback to automatic IDs otherwise.
-                       if ( self::setMathId( $eid , $renderer, $revId ) === 
false  ){
-                               $Result = preg_replace( 
'/(class="mwe-math-mathml-(inline|display))/', "id=\"$eid\" \\1", $Result );
+                       if ( self::setMathId( $eid, $renderer, $revId ) === 
false ) {
+                               $Result =
+                                       preg_replace( 
'/(class="mwe-math-mathml-(inline|display))/', "id=\"$eid\" \\1",
+                                               $Result );
                        }
-                       self::updateIndex( $revId , $eid , 
$renderer->getInputHash() , $renderer->getTex() );
+                       self::updateIndex( $revId, $eid, 
$renderer->getInputHash(), $renderer->getTex() );
                }
                return true;
        }
@@ -158,13 +164,15 @@
         * @param string|null $Result reference to the rendering result
         * @return bool
         */
-       static function addIdentifierDescription( Parser $parser, MathRenderer 
$renderer, &$Result = null ) {
+       static function addIdentifierDescription( Parser $parser, MathRenderer 
$renderer,
+               &$Result = null ) {
                $revId = $parser->getRevisionId();
-               self::setMathId( $eid , $renderer, $revId );
-               $mo = MathObject::cloneFromRenderer($renderer);
-               $mo->setRevisionID($revId);
-               $mo->setID($eid);
-               $Result = preg_replace_callback("#<(mi|mo)( 
([^>].*?))?>(.*?)</\\1>#u", array( $mo , 'addIdentifierTitle' ), $Result);
+               self::setMathId( $eid, $renderer, $revId );
+               $mo = MathObject::cloneFromRenderer( $renderer );
+               $mo->setRevisionID( $revId );
+               $mo->setID( $eid );
+               $Result = preg_replace_callback( "#<(mi|mo)( 
([^>].*?))?>(.*?)</\\1>#u",
+                       array( $mo, 'addIdentifierTitle' ), $Result );
                return true;
        }
 
@@ -175,10 +183,14 @@
         * @param string|null $Result reference to the rendering result
         * @return bool
         */
-       static function addLinkToFormulaInfoPage( Parser $parser, MathRenderer 
$renderer, &$Result = null ) {
+       static function addLinkToFormulaInfoPage( Parser $parser, MathRenderer 
$renderer,
+               &$Result = null ) {
                $revId = $parser->getRevisionId();
-               self::setMathId( $eid , $renderer, $revId );
-               $url = SpecialPage::getTitleFor( 'FormulaInfo' )->getLocalUrl( 
array( 'pid' => $revId, 'eid' => $eid ) );
+               self::setMathId( $eid, $renderer, $revId );
+               $url = SpecialPage::getTitleFor( 'FormulaInfo' )->getLocalUrl( 
array(
+                       'pid' => $revId,
+                       'eid' => $eid
+               ) );
                $Result = "<span><a href=\"$url\" id=\"$eid\" 
style=\"color:inherit;\">$Result</a></span>";
                return true;
        }
@@ -195,9 +207,10 @@
         * @param null $Result
         * @return bool
         */
-       static function onMathFormulaRenderedNoLink( Parser $parser, 
MathRenderer $renderer, &$Result = null ) {
+       static function onMathFormulaRenderedNoLink( Parser $parser, 
MathRenderer $renderer,
+               &$Result = null ) {
                $revId = $parser->getRevisionId();
-               self::setMathId($eid, $renderer, $revId);
+               self::setMathId( $eid, $renderer, $revId );
                if ( $revId > 0 ) { // Only store something if a pageid was set.
                        self::updateIndex( $revId, $eid, 
$renderer->getInputHash(), $renderer->getTex() );
                }
@@ -221,15 +234,16 @@
                return true;
        }
 
-       static function generateMathAnchorString($revId, $anchorID, $prefix = 
"#"){
+       static function generateMathAnchorString( $revId, $anchorID, $prefix = 
"#" ) {
                $result = "{$prefix}math.$revId.$anchorID";
-               Hooks::run( "MathSearchGenerateAnchorString" , array( $revId, 
$anchorID, $prefix, &$result ) );
+               Hooks::run( "MathSearchGenerateAnchorString",
+                       array( $revId, $anchorID, $prefix, &$result ) );
                return $result;
        }
 
        /**
-        * @param int    $oldID
-        * @param int    $eid
+        * @param int $oldID
+        * @param int $eid
         * @param string $inputHash
         * @param string $tex
         */
@@ -238,10 +252,10 @@
                $dbw = wfGetDB( DB_MASTER );
                $dbw->onTransactionIdle( function () use ( $oldID, $eid, 
$inputHash, $dbw ) {
                        $dbw->replace( 'mathindex', array( 
'mathindex_revision_id', 'mathindex_anchor' ), array(
-                                       'mathindex_revision_id' => $oldID,
-                                       'mathindex_anchor' => $eid,
-                                       'mathindex_inputhash' => $inputHash
-                               ) );
+                               'mathindex_revision_id' => $oldID,
+                               'mathindex_anchor' => $eid,
+                               'mathindex_inputhash' => $inputHash
+                       ) );
                } );
        }
 
@@ -253,7 +267,7 @@
         */
        static function onParserFirstCallInit( $parser ) {
                $parser->setHook( 'mquery', array( 'MathSearchHooks', 
'mQueryTagHook' ) );
-               wfDebugLog('MathSearch','mquery tag registered');
+               wfDebugLog( 'MathSearch', 'mquery tag registered' );
                return true;
        }
 
@@ -270,18 +284,87 @@
                if ( trim( $content ) === '' ) { // bug 8372
                        return '';
                }
-               wfDebugLog('MathSearch','Render mquery tag.');
+               wfDebugLog( 'MathSearch', 'Render mquery tag.' );
                //TODO: Report %\n problem to LaTeXML upstream
                $content = preg_replace( '/%\n/', '', $content );
                $renderer = new MathLaTeXML( $content );
-               $mQuerySettings  = $wgMathDefaultLaTeXMLSetting;
+               $mQuerySettings = $wgMathDefaultLaTeXMLSetting;
                $mQuerySettings['preload'][] = 'mws.sty';
-               $renderer->setLaTeXMLSettings($mQuerySettings);
-               $renderer->render( );
+               $renderer->setLaTeXMLSettings( $mQuerySettings );
+               $renderer->render();
                $renderedMath = $renderer->getHtmlOutput();
                $renderer->writeCache();
 
                return array( $renderedMath, "markerType" => 'nowiki' );
        }
 
+       static function onArticleDeleteComplete() {
+
+       }
+
+
+
+       /**
+        * Occurs after the save page request has been processed.
+        * @see 
https://www.mediawiki.org/wiki/Manual:Hooks/PageContentSaveComplete
+        *
+        * @param WikiPage $article
+        * @param User $user
+        * @param Content $content
+        * @param string $summary
+        * @param boolean $isMinor
+        * @param boolean $isWatch
+        * @param $section Deprecated
+        * @param integer $flags
+        * @param Revision|null $revision
+        * @param Status $status
+        * @param integer $baseRevId
+        *
+        * @return boolean
+        */
+       public static function onPageContentSaveComplete( $article, $user, 
$content, $summary, $isMinor,
+               $isWatch, $section, $flags, $revision, $status, $baseRevId ) {
+               //TODO: Update to JOB
+               if ( $revision == null ) {
+                       wfDebugLog( 'MathSearch', "Empty update for 
{$article->getTitle()->getFullText()}." );
+                       return true;
+               }
+               $mathTags =
+                       MathObject::extractMathTagsFromWikiText( 
ContentHandler::getContentText( $content ) );
+               $revId = $revision->getId();
+               $harvest = "";
+               if ( $mathTags ) {
+                       $dw = new MwsDumpWriter();
+                       self::resetId();
+                       foreach ( $mathTags as $tag ) {
+                               $id = null;
+                               $tagContent = $tag[1];
+                               $attributes = $tag[2];
+                               // $fullElement = $tag[3];
+                               $renderer = MathRenderer::getRenderer( 
$tagContent, $attributes, MW_MATH_LATEXML );
+                               $renderer->render();
+                               self::setMathId( $id, $renderer, $revId );
+                               $dw->addMwsExpression( $renderer->getMathml(), 
$revId, $id );
+                       }
+                       $harvest = $dw->getOutput();
+               }
+               //TODO: Figure out whats wrong with 
$revision->getPrevious()->getId();
+               $prevRevId =
+                       Revision::newFromId( 
$article->getTitle()->getLatestRevID() )->getPrevious()->getId();
+               if ( $prevRevId != null ) {
+                       $baseXUpdater = new MathEngineBaseX();
+                       $res = $baseXUpdater->update( $harvest, array( 
$prevRevId ) );
+               }
+               if ( $res ) {
+                       wfDebugLog( 'MathSearch', "Update for $revId (was 
$prevRevId)  successful." );
+               } else {
+                       wfDebugLog( 'MathSearch', "Update for $revId (was 
$prevRevId , $baseRevId)  failed." );
+               }
+
+               return true;
+       }
+
+       static function resetId() {
+               self::$nextID = 0;
+       }
 }
diff --git a/MathSearch.php b/MathSearch.php
index 5a82ede..5fbbafc 100644
--- a/MathSearch.php
+++ b/MathSearch.php
@@ -48,6 +48,7 @@
 $wgAutoloadClasses['ImportCsv'] = __DIR__ . '/includes/ImportCsv.php';
 $wgAutoloadClasses['MathSearchUtils'] = __DIR__ . 
'/includes/MathSearchUtils.php';
 $wgAutoloadClasses['MathSearchTerm'] = __DIR__ . 
'/includes/MathSearchTerm.php';
+$wgAutoloadClasses['MwsDumpWriter'] = __DIR__ . '/includes/MwsDumpWriter.php';
 
 $wgMessagesDirs['MathSeach'] = __DIR__ . '/i18n';
 $wgExtensionMessagesFiles['MathSearch'] = __DIR__ . '/MathSearch.i18n.php';
@@ -73,6 +74,8 @@
 $wgHooks['MathFormulaPostRender']['addLink'] = 
'MathSearchHooks::addLinkToFormulaInfoPage';
 $wgHooks['UnitTestsList'][] = 'MathSearchHooks::onRegisterUnitTests';
 $wgHooks['ParserFirstCallInit'][] = 'MathSearchHooks::onParserFirstCallInit';
+$wgHooks['ArticleDeleteComplete'][] = 
'MathSearchHooks::onArticleDeleteComplete';
+$wgHooks['PageContentSaveComplete'][] = 
'MathSearchHooks::onPageContentSaveComplete';
 
 $wgMathSearchBaseXBackendUrl = 'http://localhost:10043/';
 
diff --git a/includes/MwsDumpWriter.php b/includes/MwsDumpWriter.php
new file mode 100644
index 0000000..5595e15
--- /dev/null
+++ b/includes/MwsDumpWriter.php
@@ -0,0 +1,77 @@
+<?php
+
+class MwsDumpWriter {
+       private $mwsns = 'mws:';
+       private $XMLHead;
+       private $XMLFooter;
+       private $outBuffer = '';
+
+       public function __construct() {
+               $this->InitializeHeader();
+       }
+
+       public function InitializeHeader( ) {
+               $ns = $this->mwsns;
+               $this->XMLHead = "<?xml version=\"1.0\"?>\n<" . $ns .
+                       'harvest xmlns:mws="http://search.mathweb.org/ns"; 
xmlns:m="http://www.w3.org/1998/Math/MathML";>';
+               $this->XMLFooter = '</' . $ns . 'harvest>';
+       }
+
+       /**
+        * @param stdClass $row
+        *
+        * @return string
+        */
+       public function generateIndexString( $row ) {
+               $xml = simplexml_load_string( utf8_decode( $row->math_mathml ) 
);
+               if ( !$xml ) {
+                       echo "ERROR while converting:\n " . var_export( 
$row->math_mathml, true ) . "\n";
+                       foreach ( libxml_get_errors() as $error ) {
+                               echo "\t", $error->message;
+                       }
+                       libxml_clear_errors();
+                       return '';
+               }
+               return $this->getMwsExpression( utf8_decode( $row->math_mathml 
),
+                       $row->mathindex_revision_id, $row->mathindex_anchor  );
+
+       }
+
+       public function getHead() {
+               return $this->XMLHead;
+       }
+
+       public function getFooter() {
+               return $this->XMLFooter;
+       }
+
+       public function getMwsExpression( $mathML, $revId, $eId ) {
+               $out = "\n<" . $this->mwsns . "expr 
url=\"${revId}#${eId}\">\n\t";
+               $out .= $mathML;
+               $out .= "\n</" . $this->mwsns . "expr>\n";
+               return $out;
+       }
+       public function addMwsExpression( $mathML, $revId, $eId ) {
+               $this->outBuffer .= $this->getMwsExpression( $mathML, $revId, 
$eId );
+               return true;
+       }
+
+
+       /**
+        * @return string
+        */
+       public function getMwsns() {
+               return $this->mwsns;
+       }
+
+       /**
+        * @param string $mwsns
+        */
+       public function setMwsns( $mwsns ) {
+               $this->mwsns = $mwsns;
+       }
+
+       public function getOutput(){
+               return $this->getHead() . $this->outBuffer . $this->getFooter();
+       }
+}
\ No newline at end of file
diff --git a/includes/engines/MathEngineBaseX.php 
b/includes/engines/MathEngineBaseX.php
index 47fc0d6..3c1be83 100644
--- a/includes/engines/MathEngineBaseX.php
+++ b/includes/engines/MathEngineBaseX.php
@@ -51,9 +51,13 @@
        function processMathResults( $xmlRoot ) {
                foreach ( $xmlRoot->children( )->children() as $page ) {
                        $attrs = $page->attributes();
-                       $uri = explode( ".", $attrs["id"] );
-                       $revisionID = $uri[1];
-                       $AnchorID = $uri[2];
+                       $uri = explode( "#", $attrs["id"] );
+                       if ( sizeof($uri) != 2 ) {
+                               wfDebugLog('MathSearch','Can not parse'. 
$attrs['id']);
+                               continue;
+                       }
+                       $revisionID = $uri[0];
+                       $AnchorID = $uri[1];
                        $this->relevanceMap[] = $revisionID;
                        $substarr = array();
                        //TODO: Add hit support.
@@ -69,4 +73,19 @@
        function getPostData( $numProcess ){
                return json_encode( array( "type" => $this->type, "query" => 
$this->query->getCQuery()) );
        }
+
+       function update( $harvest = "", array $delte=array() ){
+               global $wgMathSearchBaseXBackendUrl;
+               $json_payload = json_encode( array( "harvest" => $harvest, 
"delete" => $delte) );
+               $res = self::doPost( $wgMathSearchBaseXBackendUrl. 
'api/update', $json_payload);
+               if($res){
+                       $resJson = json_decode($res);
+                       if ($resJson->success==true){
+                               return true;
+                       } else {
+                               wfDebugLog("MathSearch", "harvest update 
failed" . var_export($resJson,true));
+                       }
+               }
+               return false;
+       }
 }
\ No newline at end of file
diff --git a/includes/engines/MathEngineRest.php 
b/includes/engines/MathEngineRest.php
index 1b4d3b1..4b8a2b5 100644
--- a/includes/engines/MathEngineRest.php
+++ b/includes/engines/MathEngineRest.php
@@ -19,6 +19,29 @@
        /** @type string */
        protected $backendUrl = "http://localhost:9090";;
 
+       protected static function doPost( $url, $postData ) {
+               $res = Http::post( $url, array( "postData" => $postData, 
"timeout" => 60 ) );
+               if ( $res === false ) {
+                       if ( function_exists( 'curl_init' ) ) {
+                               $handle = curl_init();
+                               $options = array(
+                                       CURLOPT_URL => $url,
+                                       CURLOPT_CUSTOMREQUEST => 'POST', // GET 
POST PUT PATCH DELETE HEAD OPTIONS
+                               );
+                               // TODO: Figure out how not to write the error 
in a message and not in top of the output page
+                               curl_setopt_array( $handle, $options );
+                               $details = curl_exec( $handle );
+                       } else {
+                               $details = "curl is not installed.";
+                       }
+                       wfDebugLog( "MathSearch", "Nothing retreived from $url. 
Check if server is running. Error:" .
+                               var_export( $details, true ) );
+                       return false;
+               } else {
+                       return $res;
+               }
+       }
+
        /**
         * @return string
         */
@@ -80,25 +103,12 @@
                global $wgMathDebug;
                $numProcess = 30000;
                $postData = $this->getPostData( $numProcess );
-               $res = Http::post( $this->backendUrl, array( "postData" => 
$postData, "timeout" => 60 ) );
-               if ( $res === false ) {
-                       if ( function_exists( 'curl_init' ) ) {
-                               $handle = curl_init();
-                               $options = array(
-                                       CURLOPT_URL => $this->backendUrl,
-                                       CURLOPT_CUSTOMREQUEST => 'POST', // GET 
POST PUT PATCH DELETE HEAD OPTIONS
-                               );
-                               // TODO: Figure out how not to write the error 
in a message and not in top of the output page
-                               curl_setopt_array( $handle, $options );
-                               $details = curl_exec( $handle );
-                       } else {
-                               $details = "curl is not installed.";
-                       }
-                       wfDebugLog( "MathSearch", "Nothing retreived from 
$this->backendUrl. Check if mwsd is running. Error:" .
-                                       var_export( $details, true ) );
+               $res = self::doPost($this->backendUrl,$postData);
+               if ( $res === false ){
                        return false;
+               } else {
+                       return $this->processResults( $res, $numProcess );
                }
-               return $this->processResults( $res, $numProcess );
        }
 
        /**
diff --git a/maintenance/CreateMWSHarvest.php b/maintenance/CreateMWSHarvest.php
index ff54580..ec30e7e 100644
--- a/maintenance/CreateMWSHarvest.php
+++ b/maintenance/CreateMWSHarvest.php
@@ -41,6 +41,13 @@
                $this->addOption( 'mwsns', 'The namespace or mws normally 
"mws:"', false );
        }
 
+       public function InitializeHeader() {
+               self::$XMLHead =
+                       "<?xml version=\"1.0\"?>\n<" . self::$mwsns .
+                       'harvest xmlns:mws="http://search.mathweb.org/ns"; 
xmlns:m="http://www.w3.org/1998/Math/MathML";>';
+               self::$XMLFooter = '</' . self::$mwsns . 'harvest>';
+       }
+
        /**
         * @param stdClass $row
         *
@@ -85,10 +92,7 @@
         */
        public function execute() {
                self::$mwsns = $this->getOption( 'mwsns', '' );
-               self::$XMLHead =
-                       "<?xml version=\"1.0\"?>\n<" . self::$mwsns .
-                       'harvest xmlns:mws="http://search.mathweb.org/ns"; 
xmlns:m="http://www.w3.org/1998/Math/MathML";>';
-               self::$XMLFooter = '</' . self::$mwsns . 'harvest>';
+               $this->InitializeHeader();
                parent::execute();
        }
 }
diff --git a/maintenance/UpdateMath.php b/maintenance/UpdateMath.php
index 43d04d9..2f1dada 100644
--- a/maintenance/UpdateMath.php
+++ b/maintenance/UpdateMath.php
@@ -25,7 +25,7 @@
  * Class UpdateMath
  */
 class UpdateMath extends Maintenance {
-       const RTI_CHUNK_SIZE = 100;
+       const RTI_CHUNK_SIZE = 10000;
        public $purge = false;
        /** @var boolean */
        private $verbose;
diff --git a/tests/MwsDumpWriterTest.php b/tests/MwsDumpWriterTest.php
new file mode 100644
index 0000000..e36052b
--- /dev/null
+++ b/tests/MwsDumpWriterTest.php
@@ -0,0 +1,189 @@
+<?php
+/**
+ * Test the MathSearchUtils script.
+ *
+ * @group MathSearch
+ */
+class MwsDumpWriterTest extends MediaWikiTestCase {
+       //TODO: update tests strategy resources etc.
+       private $testWikiText = <<<'WikiText'
+<math>
+E = m \power c 1
+</math>
+
+<math>
+E = m \power c 2
+</math>
+
+<math id=incorrect>
+E = m \power c 3
+</math>
+
+<math>
+E = m \power c 4
+</math>
+WikiText;
+
+       private $expectedOutput = <<<'XML'
+<?xml version="1.0"?>
+<mws:harvest xmlns:mws="http://search.mathweb.org/ns"; 
xmlns:m="http://www.w3.org/1998/Math/MathML";>
+<mws:expr url="28351#math.28351.0">
+       <math id="p1.1.m1.1" class="ltx_Math" alttext="{\displaystyle 
E=m{c^{1}}}" display="inline">
+  <semantics id="p1.1.m1.1a">
+    <mrow id="p1.1.m1.1.5" xref="p1.1.m1.1.5.cmml">
+      <mi id="p1.1.m1.1.1" xref="p1.1.m1.1.1.cmml">E</mi>
+      <mo id="p1.1.m1.1.2" xref="p1.1.m1.1.2.cmml">=</mo>
+      <mrow id="p1.1.m1.1.5.1" xref="p1.1.m1.1.5.1.cmml">
+        <mi id="p1.1.m1.1.3" xref="p1.1.m1.1.3.cmml">m</mi>
+        <mo id="p1.1.m1.1.5.1.1" xref="p1.1.m1.1.5.1.1.cmml">⁢</mo>
+        <msup id="p1.1.m1.1.4.4" xref="p1.1.m1.1.4.1.cmml">
+          <mi id="p1.1.m1.1.4.2" xref="p1.1.m1.1.4.2.cmml">c</mi>
+          <mn id="p1.1.m1.1.4.3.1" xref="p1.1.m1.1.4.3.1.cmml">1</mn>
+        </msup>
+      </mrow>
+    </mrow>
+    <annotation-xml encoding="MathML-Content" id="p1.1.m1.1b">
+      <apply id="p1.1.m1.1.5.cmml" xref="p1.1.m1.1.5">
+        <eq id="p1.1.m1.1.2.cmml" xref="p1.1.m1.1.2"></eq>
+        <ci id="p1.1.m1.1.1.cmml" xref="p1.1.m1.1.1">E</ci>
+        <apply id="p1.1.m1.1.5.1.cmml" xref="p1.1.m1.1.5.1">
+          <times id="p1.1.m1.1.5.1.1.cmml" xref="p1.1.m1.1.5.1.1"></times>
+          <ci id="p1.1.m1.1.3.cmml" xref="p1.1.m1.1.3">m</ci>
+          <apply id="p1.1.m1.1.4.1.cmml" xref="p1.1.m1.1.4.4">
+            <power id="p1.1.m1.1.4.1.1.cmml"></power>
+            <ci id="p1.1.m1.1.4.2.cmml" xref="p1.1.m1.1.4.2">c</ci>
+            <cn type="integer" id="p1.1.m1.1.4.3.1.cmml" 
xref="p1.1.m1.1.4.3.1">1</cn>
+          </apply>
+        </apply>
+      </apply>
+    </annotation-xml>
+    <annotation encoding="application/x-tex" id="p1.1.m1.1c">{\displaystyle 
E=m{c^{1}}}</annotation>
+  </semantics>
+</math>
+</mws:expr>
+
+<mws:expr url="28351#math.28351.1">
+       <math id="p1.1.m1.1" class="ltx_Math" alttext="{\displaystyle 
E=m{c^{2}}}" display="inline">
+  <semantics id="p1.1.m1.1a">
+    <mrow id="p1.1.m1.1.5" xref="p1.1.m1.1.5.cmml">
+      <mi id="p1.1.m1.1.1" xref="p1.1.m1.1.1.cmml">E</mi>
+      <mo id="p1.1.m1.1.2" xref="p1.1.m1.1.2.cmml">=</mo>
+      <mrow id="p1.1.m1.1.5.1" xref="p1.1.m1.1.5.1.cmml">
+        <mi id="p1.1.m1.1.3" xref="p1.1.m1.1.3.cmml">m</mi>
+        <mo id="p1.1.m1.1.5.1.1" xref="p1.1.m1.1.5.1.1.cmml">⁢</mo>
+        <msup id="p1.1.m1.1.4.4" xref="p1.1.m1.1.4.1.cmml">
+          <mi id="p1.1.m1.1.4.2" xref="p1.1.m1.1.4.2.cmml">c</mi>
+          <mn id="p1.1.m1.1.4.3.1" xref="p1.1.m1.1.4.3.1.cmml">2</mn>
+        </msup>
+      </mrow>
+    </mrow>
+    <annotation-xml encoding="MathML-Content" id="p1.1.m1.1b">
+      <apply id="p1.1.m1.1.5.cmml" xref="p1.1.m1.1.5">
+        <eq id="p1.1.m1.1.2.cmml" xref="p1.1.m1.1.2"></eq>
+        <ci id="p1.1.m1.1.1.cmml" xref="p1.1.m1.1.1">E</ci>
+        <apply id="p1.1.m1.1.5.1.cmml" xref="p1.1.m1.1.5.1">
+          <times id="p1.1.m1.1.5.1.1.cmml" xref="p1.1.m1.1.5.1.1"></times>
+          <ci id="p1.1.m1.1.3.cmml" xref="p1.1.m1.1.3">m</ci>
+          <apply id="p1.1.m1.1.4.1.cmml" xref="p1.1.m1.1.4.4">
+            <power id="p1.1.m1.1.4.1.1.cmml"></power>
+            <ci id="p1.1.m1.1.4.2.cmml" xref="p1.1.m1.1.4.2">c</ci>
+            <cn type="integer" id="p1.1.m1.1.4.3.1.cmml" 
xref="p1.1.m1.1.4.3.1">2</cn>
+          </apply>
+        </apply>
+      </apply>
+    </annotation-xml>
+    <annotation encoding="application/x-tex" id="p1.1.m1.1c">{\displaystyle 
E=m{c^{2}}}</annotation>
+  </semantics>
+</math>
+</mws:expr>
+
+<mws:expr url="28351#incorrect">
+       <math id="p1.1.m1.1" class="ltx_Math" alttext="{\displaystyle 
E=m{c^{3}}}" display="inline">
+  <semantics id="p1.1.m1.1a">
+    <mrow id="p1.1.m1.1.5" xref="p1.1.m1.1.5.cmml">
+      <mi id="p1.1.m1.1.1" xref="p1.1.m1.1.1.cmml">E</mi>
+      <mo id="p1.1.m1.1.2" xref="p1.1.m1.1.2.cmml">=</mo>
+      <mrow id="p1.1.m1.1.5.1" xref="p1.1.m1.1.5.1.cmml">
+        <mi id="p1.1.m1.1.3" xref="p1.1.m1.1.3.cmml">m</mi>
+        <mo id="p1.1.m1.1.5.1.1" xref="p1.1.m1.1.5.1.1.cmml">⁢</mo>
+        <msup id="p1.1.m1.1.4.4" xref="p1.1.m1.1.4.1.cmml">
+          <mi id="p1.1.m1.1.4.2" xref="p1.1.m1.1.4.2.cmml">c</mi>
+          <mn id="p1.1.m1.1.4.3.1" xref="p1.1.m1.1.4.3.1.cmml">3</mn>
+        </msup>
+      </mrow>
+    </mrow>
+    <annotation-xml encoding="MathML-Content" id="p1.1.m1.1b">
+      <apply id="p1.1.m1.1.5.cmml" xref="p1.1.m1.1.5">
+        <eq id="p1.1.m1.1.2.cmml" xref="p1.1.m1.1.2"></eq>
+        <ci id="p1.1.m1.1.1.cmml" xref="p1.1.m1.1.1">E</ci>
+        <apply id="p1.1.m1.1.5.1.cmml" xref="p1.1.m1.1.5.1">
+          <times id="p1.1.m1.1.5.1.1.cmml" xref="p1.1.m1.1.5.1.1"></times>
+          <ci id="p1.1.m1.1.3.cmml" xref="p1.1.m1.1.3">m</ci>
+          <apply id="p1.1.m1.1.4.1.cmml" xref="p1.1.m1.1.4.4">
+            <power id="p1.1.m1.1.4.1.1.cmml"></power>
+            <ci id="p1.1.m1.1.4.2.cmml" xref="p1.1.m1.1.4.2">c</ci>
+            <cn type="integer" id="p1.1.m1.1.4.3.1.cmml" 
xref="p1.1.m1.1.4.3.1">3</cn>
+          </apply>
+        </apply>
+      </apply>
+    </annotation-xml>
+    <annotation encoding="application/x-tex" id="p1.1.m1.1c">{\displaystyle 
E=m{c^{3}}}</annotation>
+  </semantics>
+</math>
+</mws:expr>
+
+<mws:expr url="28351#math.28351.2">
+       <math id="p1.1.m1.1" class="ltx_Math" alttext="{\displaystyle 
E=m{c^{4}}}" display="inline">
+  <semantics id="p1.1.m1.1a">
+    <mrow id="p1.1.m1.1.5" xref="p1.1.m1.1.5.cmml">
+      <mi id="p1.1.m1.1.1" xref="p1.1.m1.1.1.cmml">E</mi>
+      <mo id="p1.1.m1.1.2" xref="p1.1.m1.1.2.cmml">=</mo>
+      <mrow id="p1.1.m1.1.5.1" xref="p1.1.m1.1.5.1.cmml">
+        <mi id="p1.1.m1.1.3" xref="p1.1.m1.1.3.cmml">m</mi>
+        <mo id="p1.1.m1.1.5.1.1" xref="p1.1.m1.1.5.1.1.cmml">⁢</mo>
+        <msup id="p1.1.m1.1.4.4" xref="p1.1.m1.1.4.1.cmml">
+          <mi id="p1.1.m1.1.4.2" xref="p1.1.m1.1.4.2.cmml">c</mi>
+          <mn id="p1.1.m1.1.4.3.1" xref="p1.1.m1.1.4.3.1.cmml">4</mn>
+        </msup>
+      </mrow>
+    </mrow>
+    <annotation-xml encoding="MathML-Content" id="p1.1.m1.1b">
+      <apply id="p1.1.m1.1.5.cmml" xref="p1.1.m1.1.5">
+        <eq id="p1.1.m1.1.2.cmml" xref="p1.1.m1.1.2"></eq>
+        <ci id="p1.1.m1.1.1.cmml" xref="p1.1.m1.1.1">E</ci>
+        <apply id="p1.1.m1.1.5.1.cmml" xref="p1.1.m1.1.5.1">
+          <times id="p1.1.m1.1.5.1.1.cmml" xref="p1.1.m1.1.5.1.1"></times>
+          <ci id="p1.1.m1.1.3.cmml" xref="p1.1.m1.1.3">m</ci>
+          <apply id="p1.1.m1.1.4.1.cmml" xref="p1.1.m1.1.4.4">
+            <power id="p1.1.m1.1.4.1.1.cmml"></power>
+            <ci id="p1.1.m1.1.4.2.cmml" xref="p1.1.m1.1.4.2">c</ci>
+            <cn type="integer" id="p1.1.m1.1.4.3.1.cmml" 
xref="p1.1.m1.1.4.3.1">4</cn>
+          </apply>
+        </apply>
+      </apply>
+    </annotation-xml>
+    <annotation encoding="application/x-tex" id="p1.1.m1.1c">{\displaystyle 
E=m{c^{4}}}</annotation>
+  </semantics>
+</math>
+</mws:expr>
+</mws:harvest>
+XML;
+
+
+       public function testExtract() {
+               $mathTags = MathObject::extractMathTagsFromWikiText( 
$this->testWikiText ) ;
+               $revId = 28351;
+               $dw = new MwsDumpWriter();
+               MathSearchHooks::resetId();
+               foreach ( $mathTags as $tag ) {
+                       $id = null;
+                       $content = $tag[1];
+                       $attributes = $tag[2];
+                       $renderer = MathRenderer::getRenderer( $content, 
$attributes, MW_MATH_LATEXML );
+                       $renderer->render();
+                       MathSearchHooks::setMathId( $id, $renderer, $revId );
+                       $dw->addMwsExpression( $renderer->getMathml(), $revId, 
$id );
+               }
+               $this->assertEquals( $this->expectedOutput, $dw->getOutput() );
+       }
+}
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/200100
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iff6e07e2276f32c11f4b828e55ba9e27bd33aa5d
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MathSearch
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <w...@physikerwelt.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to