Physikerwelt has submitted this change and it was merged. Change subject: Cleanup of Code ......................................................................
Cleanup of Code new Feature hightlight search result Change-Id: I16d90caf95250ccf0bba7169218da49b758dadde --- A .gitmodules M MathSearch.php M SpecialMathIndex.php M SpecialMathSearch.php M maintenance/CreateMathIndex.php M maintenance/MathDump.php A mws 7 files changed, 300 insertions(+), 215 deletions(-) Approvals: Physikerwelt: Verified; Looks good to me, approved diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..58eab46 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "mws"] + path = mws + url = https://github.com/physikerwelt/mws diff --git a/MathSearch.php b/MathSearch.php index 75613de..290f9bf 100644 --- a/MathSearch.php +++ b/MathSearch.php @@ -28,6 +28,7 @@ 'version' => '0.1.0', ); +$wgMWSUrl = 'http://localhost:9090/'; $dir = dirname( __FILE__ ) . '/'; $wgAutoloadClasses['MathSearchHooks'] = $dir . 'MathSearch.hooks.php'; diff --git a/SpecialMathIndex.php b/SpecialMathIndex.php index de8f1f3..9601f50 100644 --- a/SpecialMathIndex.php +++ b/SpecialMathIndex.php @@ -1,5 +1,7 @@ <?php class SpecialMathIndex extends SpecialPage { + const SCRIPT_UPDATE_MATH=0; + const SCRIPT_WRITE_INDEX=1; function __construct() { @@ -33,12 +35,47 @@ function testIndex() { $out = $this->getOutput(); $out->addWikiText('This is a test.'); - require_once dirname( __FILE__ ) .'/maintenance/UpdateMath.php'; - $updater = new UpdateMath(); - $updater->loadParamsAndArgs(null, array("max"=>2), null); - $updater->execute(); + $formDescriptor = array( + 'script' => array( + 'label' => 'Script', # What's the label of the field + 'type' => 'select', # What's the input type + 'help' => 'for example: \sin(?x^2)', + 'default' => 0, + 'options' => array( # The options available within the menu (displayed => value) + 'UpdateMath' => self::SCRIPT_UPDATE_MATH, # depends on how you see it but keys and values are kind of mixed here + 'ExportIndex' => self::SCRIPT_WRITE_INDEX, # "Option 1" is the displayed content, "1" is the value + 'something else' => 'option2id' # Hmtl Result = <option value="option2id">Option 2</option> + ) + ) + ); + $htmlForm = new HTMLForm( $formDescriptor ); # We build the HTMLForm object + $htmlForm->setSubmitText( 'Search' ); + $htmlForm->setSubmitCallback( array( get_class($this) , 'processInput' ) ); + $htmlForm->setTitle( $this->getTitle() ); + $htmlForm->setHeaderText("<h2>Select script to run</h2>"); + $htmlForm->show(); # Displaying the form } + /* We write a callback function */ + # OnSubmit Callback, here we do all the logic we want to do... + public static function processInput( $formData ) { + switch ($formData['script']) { + case self::SCRIPT_UPDATE_MATH: + require_once dirname( __FILE__ ) .'/maintenance/UpdateMath.php'; + $updater = new UpdateMath(); + $updater->loadParamsAndArgs(null, array("max"=>1), null); + $updater->execute(); + break; + case self::SCRIPT_WRITE_INDEX: + require_once dirname( __FILE__ ) .'/maintenance/CreateMathIndex.php'; + $updater = new CreateMathIndex(); + $updater->loadParamsAndArgs(null, array("mwsns"=>'mws:'), array(dirname( __FILE__ ) .'/mws/data/wiki')); + $updater->execute(); + break; + default: + break; + } + } } \ No newline at end of file diff --git a/SpecialMathSearch.php b/SpecialMathSearch.php index 967607b..5431b9f 100644 --- a/SpecialMathSearch.php +++ b/SpecialMathSearch.php @@ -1,4 +1,5 @@ <?php + /** * MediaWiki MathSearch extension * @@ -10,6 +11,7 @@ * @ingroup extensions */ class SpecialMathSearch extends SpecialPage { + var $qs; var $math_result; var $mathSearchExpr; @@ -17,168 +19,142 @@ var $numMathResults; var $numTextResults; var $mathResults; + var $mathpattern; + var $textpattern; + var $mathmlquery; + private $resultID = 0; /** * */ function __construct() { - parent::__construct( 'MathSearch' ); + parent::__construct('MathSearch'); } + /** - * + * * @return \LuceneSearch|boolean */ public static function getLucene() { - if ( class_exists( "LuceneSearch" ) ) { + if (class_exists("LuceneSearch")) { return new LuceneSearch(); } else { - wfDebugLog( "MathSearch", "Text search not possible. Class LuceneSearch is missing." ); + wfDebugLog("MathSearch", "Text search not possible. Class LuceneSearch is missing."); return false; } } + /** - * @param unknown $par + * The main function */ - function execute( $par ) { - global $wgRequest, $wgOut; - $text = ""; + public function execute($par) { + $request = $this->getRequest(); $this->setHeaders(); - $param = $wgRequest->getText( 'param' ); - $text = $wgRequest->getVal( 'text' ); - $pattern = $wgRequest->getVal( 'pattern' ); - if ( $param ) { - $pattern = htmlspecialchars_decode( $param ); } - $wgOut->addHTML( $this->searchForm( $pattern, $text ) ); - $time_start = microtime( true ); - if ( $pattern ) { - $this->render( $pattern ); - $time_end = microtime( true ); - $time = $time_end - $time_start; - wfDebugLog( "MathSearch", "math searched in $time seconds" ); - // $wgOut->addHTML(var_export($this->mathResults, true)); + $this->mathpattern = $request->getText('mathpattern'); + $this->textpattern = $request->getText('textpattern', ''); + $isEncoded = $request->getBool('isEncoded', false); + if ($isEncoded) { + $this->mathpattern = htmlspecialchars_decode($this->mathpattern); } - $text = $wgRequest->getVal( 'text' ); - - if ( $text == "" ) { - // $wgOut->addWikiText($this->searchResults($pattern)); - $mathout = ""; - // $wgOut->addWikiText(var_export($this->mathResults,true)); - if ( $this->mathResults ) - foreach ( $this->mathResults as $pageID => $page ) { - $article = Article::newFromId( $pageID ); - if ( $article ) - { - $pagename = (string)$article->getTitle(); - $wgOut->addWikiText( "[[$pagename]]:" ); - $this->DisplayMath( $pageID ); - // echo "success with pid:$pageID\n"; - } - else - echo "error with pid:$pageID update mathematical index\n"; - } - } // * - $ls = self::getLucene() ; - if ( $ls ) { - $ls->limit = 1000000; - if ( $text ) { - $sres = $ls->searchText( $text ); - if ( $sres && $sres->hasResults() ) { - $wgOut->addWikiText( "You searched for the text '$text' and the TeX-Pattern '$pattern'." ); - $wgOut->addWikiText( "The text search results in [{{canonicalurl:search|search=$text}} " . - $sres->getTotalHits() - . "] hits and the math pattern matched $this->numMathResults times on [{{canonicalurl:{{FULLPAGENAMEE}}|pattern=$pattern}} " . - sizeof( $this->relevantMathMap ) . - "] pages." ); - //// var_dump($sres); - wfDebugLog( 'mathsearch', 'BOF' ); - // $wgOut->addWikiText(var_export($this->relevantMathMap,true)); - $pageList = ""; - while ( $tres = $sres->next() ) { - $pageID = $tres->getTitle()->getArticleID(); - // $wgOut->addWikiText($pageID); - - if ( isset( $this->relevantMathMap[$pageID] ) ) { - $wgOut->addWikiText( "[[" . $tres->getTitle() . "]]" ); - $wgOut->addHtml( $tres->getTextSnippet( $text ) ); - $pageList .= "OR [[" . $pageID . "]]"; - // $wgOut->addHtml($this->showHit($tres),$text); - $this->DisplayMath( $pageID ); - } /*else { - $wgOut->addWikiText(":NO MATH"); - }//*/ - } // $tres->mHighlightTitle)} - - wfDebugLog( 'mathsearch', 'EOF' ); - wfDebugLog( 'mathsearch', var_export( $this->mathResults , true ) ); - } } + $this->searchForm(); + if ($this->mathpattern || $this->textpattern) { + $this->performSearch(); } - // $wgOut->addHtml(htmlspecialchars( $pattern) ); - $wgOut->addWikiText( "<math> $pattern </math>" ); - // dbw = wfGetDB( DB_MASTER );$dbw->encodeBlob(pack( 'H32' - // $inputhash= $dbw->encodeBlob(pack( 'H32',md5($pattern) ); - // $wgOut->addWikiText("$inputhash"); - $dbr = wfGetDB( DB_SLAVE ); - $inputhash = $dbr->encodeBlob( pack( 'H32', md5( $pattern ) ) ); - $rpage = $dbr->select( - 'mathindex', - array( 'mathindex_page_id', 'mathindex_anchor', 'mathindex_timestamp' ), - array( 'mathindex_inputhash' => $inputhash ) + } + + /** + * Generates the search input form + */ + private function searchForm() { + # A formDescriptor Array to tell HTMLForm what to build + $formDescriptor = array( + 'mathpattern' => array( + 'label' => 'LaTeX pattern', # What's the label of the field + 'class' => 'HTMLTextField', # What's the input type + 'help' => 'for example: \sin(?x^2)', + 'default' => $this->mathpattern, + ), + 'textpattern' => array( + 'label' => 'Text pattern', # What's the label of the field + 'class' => 'HTMLTextField', # What's the input type + 'help' => 'a term like: algebra', + 'default' => $this->textpattern, + ) ); - foreach ( $rpage as $row ) - wfDebugLog( "MathSearch", var_export( $row, true ) ); - /*$wt="{{#ask:".substr($pageList,2)." - | ?Dct:title - | ?Personname - | ?Dct:dateSubmitted - | ?Dct:subject - }}"; - $wgOut->addWikiText($wt); - wfDebugLog( 'mathsearch', $wt);*/ + if (!self::getLucene()) { + $formDescriptor['textpattern']['disabled'] = true; + $formDescriptor['textpattern']['help'] = 'LuceneSearch not found. Text search <b>disabled</b>!<br/> For details see <a href=\"http://www.mediawiki.org/wiki/Extension:MWSearch\">MWSearch</a>.'; + } + $htmlForm = new HTMLForm($formDescriptor); # We build the HTMLForm object + $htmlForm->setSubmitText('Search'); + $htmlForm->setSubmitCallback(array(get_class($this), 'processInput')); + $htmlForm->setTitle($this->getTitle()); + $htmlForm->setHeaderText("<h2>Input</h2>"); + $htmlForm->show(); # Displaying the form + } + /** + * Processes the submitted Form input + * @param array $formData + */ + public static function processInput($formData) { + $instance = new SpecialMathSearch(); + $instance->mathpattern = $formData['mathpattern']; + $instance->textpattern = $formData['textpattern']; + $instance->performSearch(); } /** + * Displays the equations for one page * @param unknown $pageID * @return boolean */ - function DisplayMath( $pageID ) { - global $wgOut; - $page = $this->mathResults[(string)$pageID]; - $dbr = wfGetDB( DB_SLAVE ); - $article = Article::newFromId( $pageID ); - $pagename = (string)$article->getTitle(); - wfDebugLog( "MathSearch", "Processing results for $pagename" ); - foreach ( $page as $anchorID => $answ ) { + function DisplayMath($pageID) { + global $wgDebugMath; + $out = $this->getOutput(); + $page = $this->mathResults[(string) $pageID]; + $dbr = wfGetDB(DB_SLAVE); + $article = Article::newFromId($pageID); + $pagename = (string) $article->getTitle(); + wfDebugLog("MathSearch", "Processing results for $pagename"); + foreach ($page as $anchorID => $answ) { $res = $dbr->selectRow( - array( 'mathindex', 'math' ), - array( 'math_mathml', 'mathindex_page_id', 'mathindex_anchor', - 'mathindex_inputhash', 'math_inputhash' ), - 'mathindex_page_id = "' . $pageID - . '" AND mathindex_anchor= "' . $anchorID - . '" AND mathindex_inputhash = math_inputhash' - ); - if ( $res ) { - $mml = utf8_decode( $res->math_mathml ); - $wgOut->addHtml( " " ); - $wgOut->addWikiText( "[[$pagename#math$anchorID|Eq: $anchorID]] ", false ); - $wgOut->addHtml( MathLaTeXML::embedMathML( $mml ) ); - $wgOut->addHtml( "<br />" ); + array('mathindex', 'math'), array('math_mathml', 'mathindex_page_id', 'mathindex_anchor', + 'mathindex_inputhash', 'math_inputhash'), 'mathindex_page_id = "' . $pageID + . '" AND mathindex_anchor= "' . $anchorID + . '" AND mathindex_inputhash = math_inputhash' + ); + if ($res) { + $mml = utf8_decode($res->math_mathml); + $out->addWikiText("====[[$pagename#math$anchorID|Eq: $anchorID (Result " . $this->resultID++ . ")]]====", false); + //$out->addHtml(MathLaTeXML::embedMathML($mml)); + $out->addHtml("<br />"); $xpath = $answ[0]['xpath']; - $xmml = new SimpleXMLElement( $res->math_mathml ); + $xmml = new SimpleXMLElement($res->math_mathml); + //TODO: Remove hack and report to Prode that he fixes that + //$xmml->registerXPathNamespace('m', 'http://www.w3.org/1998/Math/MathML'); + $xpath = str_replace('/m:semantics/m:annotation-xml[@encoding="MathML-Content"]','',$xpath); + if(!$wgDebugMath){ + $out->addWikiText("xPATH:".$xpath); + $out->addWikiText('MATHML:<source lang="xml">'.$xmml->asXML().'</source>'); + } $hit = $xmml->xpath( $xpath ); while ( list( , $node ) = each( $hit ) ) { - // $wgOut->addHtml(var_export($node,true)); - $wgOut->addHtml( "<math>" . utf8_decode( $node->asXML() ) . "</math>" ); + //$out->addHtml(var_export($node["xref"][0],true)); + $dom = new DOMDocument; + $dom->loadXML($mml); + $domRes=$dom->getElementById($node["xref"][0]); + $domRes->setAttribute('mathcolor', '#cc0000'); + $out->addHtml( $domRes->ownerDocument->saveXML()); } - wfDebugLog( "MathSearch", "PositionInfo:" . var_export( $this->mathResults[$pageID][$anchorID], true ) ); + wfDebugLog("MathSearch", "PositionInfo:" . var_export($this->mathResults[$pageID][$anchorID], true)); } else - wfDebugLog( "MathSearch", "Failure: Could not get entry $anchorID for page $pagename (id $pageID) :" . var_export( $this->mathResults, true ) ); + wfDebugLog("MathSearch", "Failure: Could not get entry $anchorID for page $pagename (id $pageID) :" . var_export($this->mathResults, true)); } // var_dump($answ); - // $xansw=new SimpleXMLElement($answ); // foreach($xansw->children("mws",TRUE) as $substpair){ // $substattrs=$substpair->attributes(); @@ -189,127 +165,195 @@ return true; } - /** - * @param unknown $pattern - * @param unknown $text - * @return string - */ - function searchForm( $pattern, $text ) { - $out = ''; - // The form header, which links back to this page. - $pageID = Title::makeTitle( NS_SPECIAL, 'MathSearch' ); - $action = $pageID->getLinkURL(); - $out .= "<form method=\"get\" action=\"$action\">\n"; - // The search text field. - $pattern = htmlspecialchars( $pattern ); - $out .= "<p>Search for LaTeX pattern <input type=\"text\" name=\"pattern\"" . " value=\"$pattern\" size=\"36\" /> for example \sin(a+?b) \n"; - if ( self::getLucene() ) { - $out .= "<p>Search for Text pattern <input type=\"text\" name=\"text\"" . " value=\"$text\" size=\"36\" />\n"; + + public function performSearch() { + global $wgDebugMath; + $out = $this->getOutput(); + $time_start = microtime(true); + $out->addWikiText('==Results=='); + $out->addWikiText('You serached for the LaTeX pattern "' . $this->mathpattern . '" and the text pattern "' . $this->textpattern . '".'); + if ($this->mathpattern) { + if ($this->render()) { + $out->addWikiText("Your mathpattern was suceessfully rendered!"); + if ($wgDebugMath) { + $out->addWikiText(" <source lang=\"xml\">" . $this->mathmlquery . "</source>"); + } + if ($this->postQuery()) { + $out->addWikiText("Your mathquery was sucessfully submitted and " . $this->numMathResults . " hits were obtained."); + } else { + $out->addWikiText("Failed to post query."); + } + $time_end = microtime(true); + $time = $time_end - $time_start; + wfDebugLog("MathSearch", "math searched in $time seconds"); + } else { + $out->addWikiText("Your query could not be renderded see the DebugLog for details."); + } + // $out->addHTML(var_export($this->mathResults, true)); } else { - $out .= "<p> LuceneSearch not found. Text search <b>disabled</b>!<br/> For details see <a href=\"http://www.mediawiki.org/wiki/Extension:MWSearch\">MWSearch</a>.</p>"; + $out->addWikiText('The math-pattern is empty. No math search has been performed.'); + $out->addWikiText("To view the text results click [{{canonicalurl:search|search=$this->textpattern}} Text-only search]."); } - // The search button. - $out .= "<input type=\"submit\" name=\"searchx\" value=\"Search\" /></p>\n"; - // The table of namespace checkboxes. - $out .= "<p><table><tr>\n"; - $out .= "</tr></table></p>\n"; - $out .= "</form>\n"; - return $out; - } - /** - * @param unknown $tex - * @return string|boolean - */ - function render( $tex ) { - $renderer = new MathLaTeXML( $tex ); - $renderer->setLaTeXMLSettings( 'profile=mwsquery' ); - $renderer->setAllowedRootElments( array( 'http://search.mathweb.org/ns:query' ) ); - $renderer->render( true ); - $contents = $renderer->getMathml(); - if ( strlen( $contents ) == 0 ) { - return 'ERROR unknown'; + if ($this->textpattern == "") { + $mathout = ""; + if ($this->mathResults) { + foreach ($this->mathResults as $pageID => $page) { + $article = Article::newFromId($pageID); + if ($article) { + $pagename = (string) $article->getTitle(); + $out->addWikiText("==[[$pagename]]=="); + $this->DisplayMath($pageID); + } + else + $out->addWikiText("Error with Page (ID=$pageID) update math index.\n"); + } + } } - return $this->genSerachString( $contents ); + $ls = self::getLucene(); + if ($ls) { + $ls->limit = 1000000; + if ($this->textpattern) { + $sres = $ls->searchText($textpattern); + if ($sres && $sres->hasResults()) { + $out->addWikiText("You searched for the text '$textpattern' and the TeX-Pattern '$pattern'."); + $out->addWikiText("The text search results in [{{canonicalurl:search|search=$textpattern}} " . + $sres->getTotalHits() + . "] hits and the math pattern matched $this->numMathResults times on [{{canonicalurl:{{FULLPAGENAMEE}}|pattern=$pattern}} " . + sizeof($this->relevantMathMap) . + "] pages."); + //// var_dump($sres); + wfDebugLog('mathsearch', 'BOF'); + // $out->addWikiText(var_export($this->relevantMathMap,true)); + $pageList = ""; + while ($tres = $sres->next()) { + $pageID = $tres->getTitle()->getArticleID(); + // $out->addWikiText($pageID); + + if (isset($this->relevantMathMap[$pageID])) { + $out->addWikiText("[[" . $tres->getTitle() . "]]"); + $out->addHtml($tres->getTextSnippet($textpattern)); + $pageList .= "OR [[" . $pageID . "]]"; + // $out->addHtml($this->showHit($tres),$textpattern); + $this->DisplayMath($pageID); + } /* else { + $out->addWikiText(":NO MATH"); + }// */ + } // $tres->mHighlightTitle)} + + wfDebugLog('mathsearch', 'EOF'); + wfDebugLog('mathsearch', var_export($this->mathResults, true)); + } + } + } + // $out->addHtml(htmlspecialchars( $pattern) ); + $out->addWikiText("<math> $this->mathpattern </math>"); + // dbw = wfGetDB( DB_MASTER );$dbw->encodeBlob(pack( 'H32' + // $inputhash= $dbw->encodeBlob(pack( 'H32',md5($pattern) ); + // $out->addWikiText("$inputhash"); + $dbr = wfGetDB(DB_SLAVE); + $inputhash = $dbr->encodeBlob(pack('H32', md5($this->mathpattern))); + $rpage = $dbr->select( + 'mathindex', array('mathindex_page_id', 'mathindex_anchor', 'mathindex_timestamp'), array('mathindex_inputhash' => $inputhash) + ); + foreach ($rpage as $row) + wfDebugLog("MathSearch", var_export($row, true)); } /** - * @param unknown $cmml + * Renders the math search input to mathml * @return boolean */ - function genSerachString( $cmml ) { - global $wgMWSUrl; + function render() { + $renderer = new MathLaTeXML($this->mathpattern); + $renderer->setLaTeXMLSettings('profile=mwsquery'); + $renderer->setAllowedRootElments(array('query')); + $renderer->render(true); + $this->mathmlquery = $renderer->getMathml(); + if (strlen($this->mathmlquery) == 0) { + return false; + } else { + return true; + } + } + + /** + * Posts the query to mwsd and evaluates the result data + * @return boolean + */ + function postQuery() { + global $wgMWSUrl, $wgDebugMath; $numProcess = 30000; - $tmp = str_replace( "answsize=\"30\"", "answsize=\"$numProcess\" totalreq=\"yes\"", $cmml ); - $mwsExpr = str_replace( "m:", "", $tmp ); - wfDebugLog( 'mathsearch', 'MWS query:' . $mwsExpr ); - $res = Http::post( $wgMWSUrl, array( "postData" => $mwsExpr, "timeout" => 60 ) ); - if ( $res == false ) { - if ( function_exists( 'curl_init' ) ) { + $tmp = str_replace("answsize=\"30\"", "answsize=\"$numProcess\" totalreq=\"yes\"", $this->mathmlquery); + $mwsExpr = str_replace("m:", "", $tmp); + wfDebugLog('mathsearch', 'MWS query:' . $mwsExpr); + $res = Http::post($wgMWSUrl, array("postData" => $mwsExpr, "timeout" => 60)); + if ($res == false) { + if (function_exists('curl_init')) { $handle = curl_init(); $options = array( - CURLOPT_URL => $wgMWSUrl, - CURLOPT_CUSTOMREQUEST => 'POST', // GET POST PUT PATCH DELETE HEAD OPTIONS + CURLOPT_URL => $wgMWSUrl, + CURLOPT_CUSTOMREQUEST => 'POST', // GET POST PUT PATCH DELETE HEAD OPTIONS ); - curl_setopt_array( $handle, $options ); - $details = curl_exec( $handle ); + //TODO: Figure out how not to write the error in a message and not in top of the output page + curl_setopt_array($handle, $options); + $details = curl_exec($handle); } else { $details = "curl is not installed."; } - wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. Check if mwsd is running. Error:" . - var_export( $details, true ) ); + wfDebugLog("MathSearch", "Nothing retreived from $wgMWSUrl. Check if mwsd is running. Error:" . + var_export($details, true)); return false; } - $xres = new SimpleXMLElement( $res ); + $xres = new SimpleXMLElement($res); + if ($wgDebugMath) { + $out = $this->getOutput(); + $out->addWikiText('<source lang="xml">' . $res . '</source>'); + } $this->numMathResults = (int) $xres["total"]; - wfDebugLog( "MathSearch", $this->numMathResults . " results retreived from $wgMWSUrl." ); - if ( $this->numMathResults == 0 ) - return false; - - - $this->relevantMathMap = array(); $this->mathResults = array(); - $this->processMathResults( $xres ); - if ( $this->numMathResults >= $numProcess ) { - ini_set( 'memory_limit', '256M' ); - for ( $i = $numProcess; $i <= $this->numMathResults; $i += $numProcess ) { - $query = str_replace( "limitmin=\"0\" ", "limitmin=\"$i\" ", $mwsExpr ); - $res = Http::post( $wgMWSUrl, array( "postData" => $query, "timeout" => 60 ) ); - wfDebugLog( 'mathsearch', 'MWS query:' . $query ); - if ( $res == false ) { - wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. check if mwsd is running there" ); + wfDebugLog("MathSearch", $this->numMathResults . " results retreived from $wgMWSUrl."); + if ($this->numMathResults == 0) + return true; + $this->relevantMathMap = array(); + $this->mathResults = array(); + $this->processMathResults($xres); + if ($this->numMathResults >= $numProcess) { + ini_set('memory_limit', '256M'); + for ($i = $numProcess; $i <= $this->numMathResults; $i += $numProcess) { + $query = str_replace("limitmin=\"0\" ", "limitmin=\"$i\" ", $mwsExpr); + $res = Http::post($wgMWSUrl, array("postData" => $query, "timeout" => 60)); + wfDebugLog('mathsearch', 'MWS query:' . $query); + if ($res == false) { + wfDebugLog("MathSearch", "Nothing retreived from $wgMWSUrl. check if mwsd is running there"); return false; } - $xres = new SimpleXMLElement( $res ); - $this->processMathResults( $xres ); + $xres = new SimpleXMLElement($res); + $this->processMathResults($xres); } } - // $this->qs = "<mws:expr> $out </mws:expr>"; - return true; - // return "$pre <mws:expr> $out </mws:expr> $post"; //$this->math_result;//"$pre <mws:expr> $out </mws:expr> $post"; } - /** * @param unknown $xmlRoot */ - function processMathResults( $xmlRoot ) - { - foreach ( $xmlRoot->children( "mws", TRUE ) as $page ) { + function processMathResults($xmlRoot) { + foreach ($xmlRoot->children("mws", TRUE) as $page) { $attrs = $page->attributes(); - $uri = explode( "#", $attrs["uri"] ); - $pageID = $uri[0]; $AnchorID = substr( $uri[1], 4 ); + $uri = explode("#", $attrs["uri"]); + $pageID = $uri[0]; + $AnchorID = substr($uri[1], 4); $this->relevantMathMap[$pageID] = true; $substarr = array(); // $this->mathResults[(string) $pageID][(string) $AnchorID][]=$page->asXML(); - foreach ( $page->children( "mws", TRUE ) as $substpair ) { + foreach ($page->children("mws", TRUE) as $substpair) { $substattrs = $substpair->attributes(); - $substarr[] = array( "qvar" => (string) $substattrs["qvar"], "xpath" => (string) $substattrs["xpath"] ); + $substarr[] = array("qvar" => (string) $substattrs["qvar"], "xpath" => (string) $substattrs["xpath"]); } - $this->mathResults[(string) $pageID][(string) $AnchorID][] = array( "xpath" => (string) $attrs["xpath"], "mappings" => $substarr );// ,"original"=>$page->asXML() - + $this->mathResults[(string) $pageID][(string) $AnchorID][] = array("xpath" => (string) $attrs["xpath"], "mappings" => $substarr); // ,"original"=>$page->asXML() } } + } diff --git a/maintenance/CreateMathIndex.php b/maintenance/CreateMathIndex.php index 3a430de..f1a569a 100644 --- a/maintenance/CreateMathIndex.php +++ b/maintenance/CreateMathIndex.php @@ -27,7 +27,7 @@ * @author Moritz Schubotz * */ -class CreateMath extends Maintenance { +class CreateMathIndex extends Maintenance { private static $mwsns = "mws:"; private static $XMLHead; private static $XMLFooter; @@ -41,7 +41,7 @@ $this->mDescription = 'Generates harvest files for the MathWebSearch Deamon.'; $this->addArg( 'dir', 'The directory where the harvest files go to.' ); $this->addArg( 'ffmax', "The maximal number of formula per file.", false ); - $this->addOption( 'mwsns', 'The namespace or mws normally "mws"', false ); + $this->addOption( 'mwsns', 'The namespace or mws normally "mws:"', false ); } /** @@ -126,5 +126,5 @@ } } -$maintClass = "CreateMath"; +$maintClass = "CreateMathIndex"; require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/MathDump.php b/maintenance/MathDump.php index 9abf1c9..e52eede 100644 --- a/maintenance/MathDump.php +++ b/maintenance/MathDump.php @@ -30,7 +30,7 @@ * Simple dump output filter to exclude all talk pages. * @ingroup Dump */ -class MathMLFilter extends DumpFilter { +class MathDump extends DumpFilter { public static function register( $backupDumper ) { $backupDumper->registerFilter( 'mathml', 'MathMLFilter' ); diff --git a/mws b/mws new file mode 160000 index 0000000..b5bff0f --- /dev/null +++ b/mws +Subproject commit b5bff0ff817fb7a58afa532cefd49b6da119f626 -- To view, visit https://gerrit.wikimedia.org/r/84481 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I16d90caf95250ccf0bba7169218da49b758dadde Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/MathSearch Gerrit-Branch: master Gerrit-Owner: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: Deyan <d.gi...@jacobs-university.de> Gerrit-Reviewer: Physikerwelt <w...@physikerwelt.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits