Physikerwelt has submitted this change and it was merged. Change subject: Prepare for DB2 integration ......................................................................
Prepare for DB2 integration * new concept of MathEngine in backend * seperate frontend and backend to prepeare for performance benchmark Change-Id: I4f967a91fd409fea7d55a4a0fa3912aac958c222 --- A MathEngineMws.php M MathObject.php M MathQueryObject.php M MathSearch.php M SpecialMathSearch.php 5 files changed, 237 insertions(+), 114 deletions(-) Approvals: Physikerwelt: Verified; Looks good to me, approved diff --git a/MathEngineMws.php b/MathEngineMws.php new file mode 100644 index 0000000..d6ed303 --- /dev/null +++ b/MathEngineMws.php @@ -0,0 +1,123 @@ +<?php + +/** + * MediaWiki MathSearch extension + * + * (c) 2014 Moritz Schubotz + * GPLv2 license; info in main package. + * + * @file + * @ingroup extensions + */ +class MathEngineMws { + /** @var MathQueryObject the query to be answered*/ + protected $query; + protected $size = false; + protected $resultSet; + protected $relevanceMap; + /** + * + * @return MathQueryObject + */ + public function getQuery() { + return $this->query; + } + function __construct(MathQueryObject $query) { + $this->query = $query; + } + public function getSize() { + return $this->size; + } + + public function getResultSet() { + return $this->resultSet; + } + + public function getRelevanceMap() { + return $this->relevanceMap; + } + + /** + * + * @param MathQueryObject $query + * @return \MathSearchEngine + */ + public function setQuery(MathQueryObject $query) { + $this->query = $query; + return $this; + } + + /** + * Posts the query to mwsd and evaluates the result data + * @return boolean + */ + function postQuery() { + global $wgMWSUrl, $wgMathDebug; + + $numProcess = 30000; + $tmp = str_replace( "answsize=\"30\"", "answsize=\"$numProcess\" totalreq=\"yes\"", $this->getQuery()->getCQuery() ); + $mwsExpr = str_replace( "m:", "", $tmp ); + wfDebugLog( 'mathsearch', 'MWS query:' . $mwsExpr ); + $res = Http::post( $wgMWSUrl, array( "postData" => $mwsExpr, "timeout" => 60 ) ); + if ( $res == false ) { + if ( function_exists( 'curl_init' ) ) { + $handle = curl_init(); + $options = array( + CURLOPT_URL => $wgMWSUrl, + CURLOPT_CUSTOMREQUEST => 'POST', // GET POST PUT PATCH DELETE HEAD OPTIONS + ); + // TODO: Figure out how not to write the error in a message and not in top of the output page + curl_setopt_array( $handle, $options ); + $details = curl_exec( $handle ); + } else { + $details = "curl is not installed."; + } + wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. Check if mwsd is running. Error:" . + var_export( $details, true ) ); + return false; + } + $xres = new SimpleXMLElement( $res ); + $this->size = (int) $xres["total"]; + wfDebugLog( "MathSearch", $this->size . " results retreived from $wgMWSUrl." ); + if ($this->size == 0) { + return true; + } + $this->relevanceMap = array(); + $this->resultSet = array(); + $this->processMathResults( $xres ); + if ( $this->size >= $numProcess ) { + ini_set( 'memory_limit', '256M' ); + for ( $i = $numProcess; $i <= $this->size; $i += $numProcess ) { + $query = str_replace( "limitmin=\"0\" ", "limitmin=\"$i\" ", $mwsExpr ); + $res = Http::post( $wgMWSUrl, array( "postData" => $query, "timeout" => 60 ) ); + wfDebugLog( 'mathsearch', 'MWS query:' . $query ); + if ( $res == false ) { + wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. check if mwsd is running there" ); + return false; + } + $xres = new SimpleXMLElement( $res ); + $this->processMathResults( $xres ); + } + } + return true; + } + /** + * @param unknown $xmlRoot + */ + function processMathResults( $xmlRoot ) { + foreach ( $xmlRoot->children( "mws", TRUE ) as $page ) { + $attrs = $page->attributes(); + $uri = explode( "#", $attrs["uri"] ); + $pageID = $uri[0]; + $AnchorID = substr( $uri[1], 4 ); + $this->relevanceMap[$pageID] = true; + $substarr = array(); + // $this->mathResults[(string) $pageID][(string) $AnchorID][]=$page->asXML(); + foreach ( $page->children( "mws", TRUE ) as $substpair ) { + $substattrs = $substpair->attributes(); + $substarr[] = array( "qvar" => (string) $substattrs["qvar"], "xpath" => (string) $substattrs["xpath"] ); + } + $this->resultSet[(string) $pageID][(string) $AnchorID][] = array( "xpath" => (string) $attrs["xpath"], "mappings" => $substarr ); // ,"original"=>$page->asXML() + } + } +} diff --git a/MathObject.php b/MathObject.php index a4bcc09..a078c74 100644 --- a/MathObject.php +++ b/MathObject.php @@ -146,6 +146,12 @@ } } + /** + * + * @param int $pid + * @param int $eid + * @return self instance + */ public static function constructformpage( $pid, $eid ) { $dbr = wfGetDB( DB_SLAVE ); $res = $dbr->selectRow( diff --git a/MathQueryObject.php b/MathQueryObject.php index 40deeac..59c8d8a 100644 --- a/MathQueryObject.php +++ b/MathQueryObject.php @@ -167,9 +167,24 @@ return $out; } + public function getLaTeXMLCMMLSettings(){ + global $wgMathDefaultLaTeXMLSetting; + $cSettings = $wgMathDefaultLaTeXMLSetting; + $cSettings['preload'][] = 'mws.sty'; + $cSettings['stylesheet'] = 'MWSquery.xsl'; + return $cSettings; + } + + public function getLaTeXMLPMLSettings(){ + global $wgMathDefaultLaTeXMLSetting; + $cSettings = array_diff($wgMathDefaultLaTeXMLSetting, array('cmml')); + $cSettings['preload'][] = 'mws.sty'; + $cSettings['stylesheet'] = 'MWSquery.xsl'; + return $cSettings; + } public function generateContentQueryString(){ $renderer = new MathLaTeXML($this->getTexQuery()); - $renderer->setLaTeXMLSettings('profile=mwsquery'); + $renderer->setLaTeXMLSettings($this->getLaTeXMLCMMLSettings()); $renderer->setAllowedRootElments(array('query')); $renderer->render(true); $this->cquery = $renderer->getMathml(); @@ -221,5 +236,63 @@ $xQueryGenertor = $this->setXQueryGenerator(); return $xQueryGenertor->getXQuery(); } - + + /** + * Posts the query to mwsd and evaluates the result data + * @return boolean + */ + function postQuery() { + global $wgMWSUrl, $wgMathDebug; + + $numProcess = 30000; + $tmp = str_replace( "answsize=\"30\"", "answsize=\"$numProcess\" totalreq=\"yes\"", $this->getCQuery() ); + $mwsExpr = str_replace( "m:", "", $tmp ); + wfDebugLog( 'mathsearch', 'MWS query:' . $mwsExpr ); + $res = Http::post( $wgMWSUrl, array( "postData" => $mwsExpr, "timeout" => 60 ) ); + if ( $res == false ) { + if ( function_exists( 'curl_init' ) ) { + $handle = curl_init(); + $options = array( + CURLOPT_URL => $wgMWSUrl, + CURLOPT_CUSTOMREQUEST => 'POST', // GET POST PUT PATCH DELETE HEAD OPTIONS + ); + // TODO: Figure out how not to write the error in a message and not in top of the output page + curl_setopt_array( $handle, $options ); + $details = curl_exec( $handle ); + } else { + $details = "curl is not installed."; + } + wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. Check if mwsd is running. Error:" . + var_export( $details, true ) ); + return false; + } + $xres = new SimpleXMLElement( $res ); + if ( $wgMathDebug ) { + $out = $this->getOutput(); + $out->addWikiText( '<source lang="xml">' . $res . '</source>' ); + } + $this->numMathResults = (int) $xres["total"]; + wfDebugLog( "MathSearch", $this->numMathResults . " results retreived from $wgMWSUrl." ); + if ( $this->numMathResults == 0 ) + return true; + $this->relevantMathMap = array(); + $this->mathResults = array(); + $this->processMathResults( $xres ); + if ( $this->numMathResults >= $numProcess ) { + ini_set( 'memory_limit', '256M' ); + for ( $i = $numProcess; $i <= $this->numMathResults; $i += $numProcess ) { + $query = str_replace( "limitmin=\"0\" ", "limitmin=\"$i\" ", $mwsExpr ); + $res = Http::post( $wgMWSUrl, array( "postData" => $query, "timeout" => 60 ) ); + wfDebugLog( 'mathsearch', 'MWS query:' . $query ); + if ( $res == false ) { + wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. check if mwsd is running there" ); + return false; + } + $xres = new SimpleXMLElement( $res ); + $this->processMathResults( $xres ); + } + } + return true; + } + } diff --git a/MathSearch.php b/MathSearch.php index 854adb6..4afc702 100644 --- a/MathSearch.php +++ b/MathSearch.php @@ -42,7 +42,7 @@ $wgAutoloadClasses['GetEquationsByQuery'] = $dir . 'GetEquationsByQuery.php'; $wgAutoloadClasses['SpecialMathDebug'] = $dir . 'SpecialMathDebug.php'; $wgAutoloadClasses['SpecialMathIndex'] = $dir . 'SpecialMathIndex.php'; - +$wgAutoloadClasses['MathEngineMws'] = $dir . 'MathEngineMws.php'; $wgExtensionMessagesFiles['MathSearch'] = $dir . 'MathSearch.i18n.php'; $wgExtensionMessagesFiles['MathSearchAlias'] = $dir . 'MathSearch.alias.php'; diff --git a/SpecialMathSearch.php b/SpecialMathSearch.php index a6ca61a..9151e23 100644 --- a/SpecialMathSearch.php +++ b/SpecialMathSearch.php @@ -15,8 +15,6 @@ var $qs; var $math_result; var $mathSearchExpr; - var $relevantMathMap; - var $numMathResults; var $numTextResults; var $mathResults; var $mathpattern; @@ -116,45 +114,45 @@ function DisplayMath( $pageID ) { global $wgMathDebug; $out = $this->getOutput(); - $page = $this->mathResults[(string) $pageID]; + $resultes = $this->mathBackend->getResultSet(); + $page = $resultes[(string) $pageID]; $dbr = wfGetDB( DB_SLAVE ); $article = Article::newFromId( $pageID ); $pagename = (string) $article->getTitle(); wfDebugLog( "MathSearch", "Processing results for $pagename" ); foreach ( $page as $anchorID => $answ ) { - $res = $dbr->selectRow( - array( 'mathindex', 'math' ), array( 'math_mathml', 'mathindex_page_id', 'mathindex_anchor', - 'mathindex_inputhash', 'math_inputhash' ), 'mathindex_page_id = "' . $pageID - . '" AND mathindex_anchor= "' . $anchorID - . '" AND mathindex_inputhash = math_inputhash' - ); - if ( $res ) { - $mml = utf8_decode( $res->math_mathml ); + $res = MathObject::constructformpage($pageID, $anchorID); + $mml = $res->getMathml(); + if ( $mml ) { $out->addWikiText( "====[[$pagename#math$anchorID|Eq: $anchorID (Result " . $this->resultID++ . ")]]====", false ); // $out->addHtml(MathLaTeXML::embedMathML($mml)); $out->addHtml( "<br />" ); $xpath = $answ[0]['xpath']; - $xmml = new SimpleXMLElement( $res->math_mathml ); // TODO: Remove hack and report to Prode that he fixes that // $xmml->registerXPathNamespace('m', 'http://www.w3.org/1998/Math/MathML'); $xpath = str_replace( '/m:semantics/m:annotation-xml[@encoding="MathML-Content"]', '', $xpath ); - if ( !$wgMathDebug ) { - $out->addWikiText( "xPATH:" . $xpath ); - $out->addWikiText( 'MATHML:<source lang="xml">' . $xmml->asXML() . '</source>' ); + $dom = new DOMDocument; + $dom->loadXML( $mml ); + $DOMx = new DOMXpath($dom); + $hits = $DOMx->query($xpath); + if ( $wgMathDebug ) { + wfDebugLog('MathSearch', "xPATH:" . $xpath); } - $hit = $xmml->xpath( $xpath ); - while ( list( , $node ) = each( $hit ) ) { - // $out->addHtml(var_export($node["xref"][0],true)); - $dom = new DOMDocument; - $dom->loadXML( $mml ); - $domRes = $dom->getElementById( $node["xref"][0] ); - if ( $domRes ) { - $domRes->setAttribute( 'mathcolor', '#cc0000' ); - $out->addHtml( $domRes->ownerDocument->saveXML() ); - } else { - $renderer = new MathMathML(); - $renderer->setMathml( $mml ); - $out->addHtml( $renderer->getHtmlOutput() ); + //$hit = $xmml->xpath( $xpath ); + if (!is_null($hits)) { + foreach ($hits as $node) { + /* @var $node */ + //$node->item($index)->attributes->getNamedItem($name)->nodeValue + // $out->addHtml(var_export($node["xref"][0],true)); + $domRes = $dom->getElementById( $node->attributes->getNamedItem('xref')->nodeValue ); + if ( $domRes ) { + $domRes->setAttribute( 'mathcolor', '#cc0000' ); + $out->addHtml( $domRes->ownerDocument->saveXML() ); + } else { + $renderer = new MathMathML(); + $renderer->setMathml( $mml ); + $out->addHtml( $renderer->getHtmlOutput() ); + } } } @@ -209,8 +207,9 @@ $this->printSource( $query->getCQuery() ); } } - if ( $this->postQuery() ) { - $out->addWikiText( "Your mathquery was sucessfully submitted and " . $this->numMathResults . " hits were obtained." ); + $this->mathBackend = new MathEngineMws($query); + if ( $this->mathBackend->postQuery() ) { + $out->addWikiText( "Your mathquery was sucessfully submitted and " . $this->mathBackend->getSize() . " hits were obtained." ); } else { $out->addWikiText( "Failed to post query." ); } @@ -228,8 +227,9 @@ if ( $this->textpattern == "" ) { $mathout = ""; - if ( $this->mathResults ) { - foreach ( $this->mathResults as $pageID => $page ) { + $results = $this->mathBackend->getResultSet(); + if ( $results ) { + foreach ( $results as $pageID => $page ) { $article = Article::newFromId( $pageID ); if ( $article ) { $pagename = (string) $article->getTitle(); @@ -310,83 +310,4 @@ return true; } } - - /** - * Posts the query to mwsd and evaluates the result data - * @return boolean - */ - function postQuery() { - global $wgMWSUrl, $wgMathDebug; - - $numProcess = 30000; - $tmp = str_replace( "answsize=\"30\"", "answsize=\"$numProcess\" totalreq=\"yes\"", $this->mathmlquery ); - $mwsExpr = str_replace( "m:", "", $tmp ); - wfDebugLog( 'mathsearch', 'MWS query:' . $mwsExpr ); - $res = Http::post( $wgMWSUrl, array( "postData" => $mwsExpr, "timeout" => 60 ) ); - if ( $res == false ) { - if ( function_exists( 'curl_init' ) ) { - $handle = curl_init(); - $options = array( - CURLOPT_URL => $wgMWSUrl, - CURLOPT_CUSTOMREQUEST => 'POST', // GET POST PUT PATCH DELETE HEAD OPTIONS - ); - // TODO: Figure out how not to write the error in a message and not in top of the output page - curl_setopt_array( $handle, $options ); - $details = curl_exec( $handle ); - } else { - $details = "curl is not installed."; - } - wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. Check if mwsd is running. Error:" . - var_export( $details, true ) ); - return false; - } - $xres = new SimpleXMLElement( $res ); - if ( $wgMathDebug ) { - $out = $this->getOutput(); - $out->addWikiText( '<source lang="xml">' . $res . '</source>' ); - } - $this->numMathResults = (int) $xres["total"]; - wfDebugLog( "MathSearch", $this->numMathResults . " results retreived from $wgMWSUrl." ); - if ( $this->numMathResults == 0 ) - return true; - $this->relevantMathMap = array(); - $this->mathResults = array(); - $this->processMathResults( $xres ); - if ( $this->numMathResults >= $numProcess ) { - ini_set( 'memory_limit', '256M' ); - for ( $i = $numProcess; $i <= $this->numMathResults; $i += $numProcess ) { - $query = str_replace( "limitmin=\"0\" ", "limitmin=\"$i\" ", $mwsExpr ); - $res = Http::post( $wgMWSUrl, array( "postData" => $query, "timeout" => 60 ) ); - wfDebugLog( 'mathsearch', 'MWS query:' . $query ); - if ( $res == false ) { - wfDebugLog( "MathSearch", "Nothing retreived from $wgMWSUrl. check if mwsd is running there" ); - return false; - } - $xres = new SimpleXMLElement( $res ); - $this->processMathResults( $xres ); - } - } - return true; - } - - /** - * @param unknown $xmlRoot - */ - function processMathResults( $xmlRoot ) { - foreach ( $xmlRoot->children( "mws", TRUE ) as $page ) { - $attrs = $page->attributes(); - $uri = explode( "#", $attrs["uri"] ); - $pageID = $uri[0]; - $AnchorID = substr( $uri[1], 4 ); - $this->relevantMathMap[$pageID] = true; - $substarr = array(); - // $this->mathResults[(string) $pageID][(string) $AnchorID][]=$page->asXML(); - foreach ( $page->children( "mws", TRUE ) as $substpair ) { - $substattrs = $substpair->attributes(); - $substarr[] = array( "qvar" => (string) $substattrs["qvar"], "xpath" => (string) $substattrs["xpath"] ); - } - $this->mathResults[(string) $pageID][(string) $AnchorID][] = array( "xpath" => (string) $attrs["xpath"], "mappings" => $substarr ); // ,"original"=>$page->asXML() - } - } - } -- To view, visit https://gerrit.wikimedia.org/r/110704 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I4f967a91fd409fea7d55a4a0fa3912aac958c222 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/MathSearch Gerrit-Branch: master Gerrit-Owner: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: Physikerwelt <w...@physikerwelt.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits