Physikerwelt has submitted this change and it was merged. Change subject: Update to NTCIR-11 query format ......................................................................
Update to NTCIR-11 query format * Add NTCIR-11 tex query format. TODO: Update XML format as well. Change-Id: I9faf5957d53813711c97ca4b9b5d025b2bc8a8f5 --- M MathQueryObject.php M maintenance/GenerateWorkload.php M maintenance/IndexBase.php 3 files changed, 73 insertions(+), 14 deletions(-) Approvals: Physikerwelt: Looks good to me, approved jenkins-bot: Verified diff --git a/MathQueryObject.php b/MathQueryObject.php index dc06222..79938e3 100644 --- a/MathQueryObject.php +++ b/MathQueryObject.php @@ -11,6 +11,16 @@ /** @var XQueryGenerator current instance of xQueryGenerator */ private $xQuery = false; private $xQueryDialect = false; +/* ToDo: Update to new format +<code> + latexmlc --whatsin=fragment --path=$(LLIB) \ +--preamble=$(LLIB)/pre.tex --postamble=$(LLIB)/post.tex \ +--format=xml --cmml --pmml --preload=[ids]latexml.sty \ +--stylesheet=$(LLIB)/ntcir11-topic.xsl \ +--destination=$@ --log=$(basename $<).ltxlog $< +</code> see http://kwarc.info/kohlhase/event/NTCIR11/ +*/ + private $pmmlSettings = array('format' => 'xml', 'whatsin' => 'math', 'whatsout' => 'math', @@ -48,6 +58,28 @@ $this->queryID = $id; } + public function exportTexDocument(){ + $texInput = htmlspecialchars( $this->getUserInputTex()); + $title = Title::newFromId( $this->getPageID() ); + $absUrl = $title->getFullURL(array("oldid"=>$title->getLatestRevID()))."#math{$this->getAnchorID()}"; + return <<<TeX +\begin{topic}{{$this->getPageTitle()}-{$this->getAnchorID()}} + \begin{fquery}\${$this->getTeXQuery()}\$\end{fquery} +\begin{private} + \begin{relevance} + find result similar to + <a href="$absUrl"> + $texInput + </a> + \end{relevance} + \examplehit{{$absUrl}} + \contributor{Moritz Schubotz} +\end{private} +\end{topic} +TeX; + + } + /** * * @param ResultWrapper $rpage @@ -76,7 +108,7 @@ * @return string */ public function getTeXQuery(){ - if ($this->texquery === false ){ + if ($this->texquery == false ){ $this->injectQvar(); } return $this->texquery; @@ -105,6 +137,10 @@ } return $this->pquery; } + + /** + * @return bool|string + */ public function serlializeToXML( ){ $cx = simplexml_load_string($this->getCQuery()); $px = simplexml_load_string($this->getPQuery()); @@ -130,7 +166,7 @@ . htmlspecialchars( $this->getUserInputTex()) ."</a></relevance>"; $out.="\n</topic>\n"; return $out; - } + } public function injectQvar() { $out = ""; diff --git a/maintenance/GenerateWorkload.php b/maintenance/GenerateWorkload.php index 8f9afbf..ef77ed9 100644 --- a/maintenance/GenerateWorkload.php +++ b/maintenance/GenerateWorkload.php @@ -28,20 +28,40 @@ * */ class GenerateWorkload extends IndexBase { - private $id =0; - + private $id = 0; + private $selectivity = PHP_INT_MAX; + private $head = <<<'XML' +<?xml version="1.0" encoding="UTF-8"?> +<topics xmlns="http://ntcir-math.nii.ac.jp/" xmlns:m="http://www.w3.org/1998/Math/MathML" xml:id="Document"> +XML; + private $footer=<<<'XML' +</topics> +XML; + public function __construct() { + parent::__construct(); + $this->mDescription = 'Generates a workload of sample queries.'; + $this->addOption( 'format', "Specifies the output format. Valid options (XML|tex).", false, true, "F" ); + $this->addOption( 'selectivity' , "Specifies the selectivity for each individual equation", false, true, "S"); + } /** * @param ResultWrapper $row * @return string */ protected function generateIndexString( $row ){ - $q = MathQueryObject::newQueryFromEquationRow($row, ++$this->id ); - $out = $q->serlializeToXML(); - if( $out == false ){ - echo 'problem with '.var_export($q,true)."\n"; - $out = ''; - } - return $out; + if ( mt_rand() <= $this->selectivity ){ + $q = MathQueryObject::newQueryFromEquationRow($row, ++$this->id ); + $format = $this->getOption( "format", "XML" ); + if( $format == "tex" ){ + $out = $q->exportTexDocument(); + } else { + $out = $q->serlializeToXML(); + } + if( $out == false ){ + echo 'problem with '.var_export($q,true)."\n"; + $out = ''; + } + return $out; + } else return ''; } @@ -49,6 +69,7 @@ libxml_use_internal_errors( true ); $i = 0; $inc = $this->getArg( 1, 100 ); + $this->selectivity = (int) ($this->getOption( "selectivity", 1 ) * mt_getrandmax()) ; $db = wfGetDB( DB_SLAVE ); echo "getting list of all equations from the database\n"; $this->res = $db->select( @@ -69,10 +90,12 @@ } protected function getHead(){ - return '<?xml version="1.0" encoding="UTF-8"?>'.PHP_EOL; + if ($this->getOption( "format", "XML" ) == "XML" ) + return $this->head; } protected function getFooter(){ - return ""; + if ($this->getOption( "format", "XML" ) == "XML" ) + return $this->footer; } } $maintClass = "GenerateWorkload"; diff --git a/maintenance/IndexBase.php b/maintenance/IndexBase.php index b5473c5..81e2864 100644 --- a/maintenance/IndexBase.php +++ b/maintenance/IndexBase.php @@ -40,7 +40,7 @@ $this->addArg( 'ffmax', "The maximal number of formula per file.", false ); $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", false ); $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", false ); - $this->addOption( 'limit', 'The maximal number of database entries to be considered', false ); + $this->addOption( 'limit', 'The maximal number of database entries to be considered', false ,true , "L"); } /** -- To view, visit https://gerrit.wikimedia.org/r/133118 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9faf5957d53813711c97ca4b9b5d025b2bc8a8f5 Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/extensions/MathSearch Gerrit-Branch: master Gerrit-Owner: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: L10n-bot <l10n-...@translatewiki.net> Gerrit-Reviewer: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits