Physikerwelt has submitted this change and it was merged.

Change subject: Update to NTCIR-11 query format
......................................................................


Update to NTCIR-11 query format

* Add NTCIR-11 tex query format.
TODO: Update XML format as well.

Change-Id: I9faf5957d53813711c97ca4b9b5d025b2bc8a8f5
---
M MathQueryObject.php
M maintenance/GenerateWorkload.php
M maintenance/IndexBase.php
3 files changed, 73 insertions(+), 14 deletions(-)

Approvals:
  Physikerwelt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/MathQueryObject.php b/MathQueryObject.php
index dc06222..79938e3 100644
--- a/MathQueryObject.php
+++ b/MathQueryObject.php
@@ -11,6 +11,16 @@
        /** @var XQueryGenerator current instance of xQueryGenerator  */
        private $xQuery = false;
        private $xQueryDialect = false;
+/* ToDo: Update to new format
+<code>
+       latexmlc --whatsin=fragment --path=$(LLIB) \
+--preamble=$(LLIB)/pre.tex --postamble=$(LLIB)/post.tex \
+--format=xml --cmml --pmml --preload=[ids]latexml.sty \
+--stylesheet=$(LLIB)/ntcir11-topic.xsl \
+--destination=$@ --log=$(basename $<).ltxlog $<
+</code> see http://kwarc.info/kohlhase/event/NTCIR11/
+*/
+
        private $pmmlSettings = array('format' => 'xml',
        'whatsin' => 'math',
        'whatsout' => 'math',
@@ -48,6 +58,28 @@
                $this->queryID = $id;
        }
 
+       public function exportTexDocument(){
+               $texInput = htmlspecialchars( $this->getUserInputTex());
+               $title = Title::newFromId( $this->getPageID() );
+               $absUrl  = 
$title->getFullURL(array("oldid"=>$title->getLatestRevID()))."#math{$this->getAnchorID()}";
+               return <<<TeX
+\begin{topic}{{$this->getPageTitle()}-{$this->getAnchorID()}}
+  \begin{fquery}\${$this->getTeXQuery()}\$\end{fquery}
+\begin{private}
+    \begin{relevance}
+               find result similar to
+               <a href="$absUrl">
+               $texInput
+               </a>
+    \end{relevance}
+    \examplehit{{$absUrl}}
+    \contributor{Moritz Schubotz}
+\end{private}
+\end{topic}
+TeX;
+
+       }
+
        /**
         * 
         * @param ResultWrapper $rpage
@@ -76,7 +108,7 @@
         * @return string
         */
        public function getTeXQuery(){
-               if ($this->texquery === false ){
+               if ($this->texquery == false ){
                        $this->injectQvar();
                }
                return $this->texquery;
@@ -105,6 +137,10 @@
                }
                return $this->pquery;
        }
+
+       /**
+        * @return bool|string
+        */
        public function serlializeToXML(  ){
                $cx = simplexml_load_string($this->getCQuery());
                $px = simplexml_load_string($this->getPQuery());
@@ -130,7 +166,7 @@
                        . htmlspecialchars( $this->getUserInputTex()) 
."</a></relevance>";
                $out.="\n</topic>\n";
                return $out;
-               }
+                           }
 
        public function injectQvar() {
                $out = "";
diff --git a/maintenance/GenerateWorkload.php b/maintenance/GenerateWorkload.php
index 8f9afbf..ef77ed9 100644
--- a/maintenance/GenerateWorkload.php
+++ b/maintenance/GenerateWorkload.php
@@ -28,20 +28,40 @@
  *
  */
 class GenerateWorkload extends IndexBase {
-       private $id =0;
-
+       private $id = 0;
+       private $selectivity = PHP_INT_MAX;
+       private $head = <<<'XML'
+<?xml version="1.0" encoding="UTF-8"?>
+<topics xmlns="http://ntcir-math.nii.ac.jp/"; 
xmlns:m="http://www.w3.org/1998/Math/MathML"; xml:id="Document">
+XML;
+       private $footer=<<<'XML'
+</topics>
+XML;
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = 'Generates a workload of sample queries.';
+               $this->addOption( 'format', "Specifies the output format. Valid 
options (XML|tex).", false, true, "F" );
+               $this->addOption( 'selectivity' , "Specifies the selectivity 
for each individual equation", false, true, "S");
+       }
        /**
         * @param ResultWrapper $row
         * @return string
         */
        protected function generateIndexString( $row ){
-               $q = MathQueryObject::newQueryFromEquationRow($row, ++$this->id 
);
-               $out = $q->serlializeToXML();
-               if( $out == false ){
-                       echo 'problem with '.var_export($q,true)."\n";
-                       $out = '';
-               }
-               return $out;
+               if ( mt_rand() <= $this->selectivity ){
+                       $q = MathQueryObject::newQueryFromEquationRow($row, 
++$this->id );
+                       $format = $this->getOption( "format", "XML" );
+                       if( $format == "tex" ){
+                               $out = $q->exportTexDocument();
+                       } else {
+                               $out = $q->serlializeToXML();
+                       }
+                       if( $out == false ){
+                               echo 'problem with '.var_export($q,true)."\n";
+                               $out = '';
+                       }
+                       return $out;
+               } else return '';
        }
 
 
@@ -49,6 +69,7 @@
                libxml_use_internal_errors( true );
                $i = 0;
                $inc = $this->getArg( 1, 100 );
+               $this->selectivity = (int) ($this->getOption( "selectivity", 1 
) * mt_getrandmax()) ;
                $db = wfGetDB( DB_SLAVE );
                echo "getting list of all equations from the database\n";
                $this->res = $db->select(
@@ -69,10 +90,12 @@
        }
 
        protected function getHead(){
-               return '<?xml version="1.0" encoding="UTF-8"?>'.PHP_EOL;
+               if ($this->getOption( "format", "XML" ) == "XML" )
+                       return $this->head;
        }
        protected function getFooter(){
-               return "";
+               if ($this->getOption( "format", "XML" ) == "XML" )
+                       return $this->footer;
        }
 }
 $maintClass = "GenerateWorkload";
diff --git a/maintenance/IndexBase.php b/maintenance/IndexBase.php
index b5473c5..81e2864 100644
--- a/maintenance/IndexBase.php
+++ b/maintenance/IndexBase.php
@@ -40,7 +40,7 @@
                $this->addArg( 'ffmax', "The maximal number of formula per 
file.", false );
                $this->addArg( 'min', "If set processing is started at the page 
with rank(pageID)>min", false );
                $this->addArg( 'max', "If set processing is stopped at the page 
with rank(pageID)<=max", false );
-               $this->addOption( 'limit', 'The maximal number of database 
entries to be considered', false );
+               $this->addOption( 'limit', 'The maximal number of database 
entries to be considered', false ,true , "L");
        }
 
        /**

-- 
To view, visit https://gerrit.wikimedia.org/r/133118
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I9faf5957d53813711c97ca4b9b5d025b2bc8a8f5
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/MathSearch
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <w...@physikerwelt.de>
Gerrit-Reviewer: L10n-bot <l10n-...@translatewiki.net>
Gerrit-Reviewer: Physikerwelt <w...@physikerwelt.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to