Physikerwelt has submitted this change and it was merged. Change subject: Identification of identifiers ......................................................................
Identification of identifiers Change-Id: I2400975895fca5d61babe8703a237d5038f156dc --- M MathObject.php M MathSearch.hooks.php A db/mathsemantics.sql R db/snippets/CosProd.sql A db/snippets/createSentenceHash.sql R db/snippets/getNorm.sql A db/snippets/mathidentifier.view.sql A db/snippets/most_common_meanings.sql A runTestsOnVagrant 9 files changed, 74 insertions(+), 20 deletions(-) Approvals: Physikerwelt: Verified; Looks good to me, approved diff --git a/MathObject.php b/MathObject.php index a078c74..6034c95 100644 --- a/MathObject.php +++ b/MathObject.php @@ -72,7 +72,7 @@ $dbr = wfGetDB( DB_SLAVE ); try { $res = $dbr->select( 'mathpagesimilarity', array( 'pagesimilarity_A as A', 'pagesimilarity_B as B', 'pagesimilarity_Value as V' ), "pagesimilarity_A=$pid OR pagesimilarity_B=$pid", __METHOD__, array( - "ORDER BY" => 'V DESC', "LIMIT" => 10 ) + "ORDER BY" => 'V DESC', "LIMIT" => 10 ) ); foreach ( $res as $row ) { if ( $row->A == $pid ) { @@ -82,7 +82,7 @@ } $article = WikiPage::newFromId( $other ); $out .= '# [[' . $article->getTitle() . ']] similarity ' . - $row->V * 100 . "%\n"; + $row->V * 100 . "%\n"; // .' ( pageid'.$other.'/'.$row->A.')' ); } $wgOut->addWikiText( $out ); @@ -96,18 +96,18 @@ $dbr = wfGetDB( DB_SLAVE ); try { $res = $dbr->select( array( "mathobservation", "mathvarstat", 'mathpagestat' ) - , array( "mathobservation_featurename", "mathobservation_featuretype", 'varstat_featurecount', - 'pagestat_featurecount', "count(*) as localcnt" ), array( "mathobservation_inputhash" => $this->getInputHash(), - 'varstat_featurename = mathobservation_featurename', - 'varstat_featuretype = mathobservation_featuretype', - 'pagestat_pageid' => $this->getPageID(), - 'pagestat_featureid = varstat_id' - ) - , __METHOD__, array( 'GROUP BY' => 'mathobservation_featurename', - 'ORDER BY' => 'varstat_featurecount' ) + , array( "mathobservation_featurename", "mathobservation_featuretype", 'varstat_featurecount', + 'pagestat_featurecount', "count(*) as localcnt" ), array( "mathobservation_inputhash" => $this->getInputHash(), + 'varstat_featurename = mathobservation_featurename', + 'varstat_featuretype = mathobservation_featuretype', + 'pagestat_pageid' => $this->getPageID(), + 'pagestat_featureid = varstat_id' + ) + , __METHOD__, array( 'GROUP BY' => 'mathobservation_featurename', + 'ORDER BY' => 'varstat_featurecount' ) ); } catch ( Exception $e ) { - return "Databaseproblem"; + return "Database problem"; } $wgOut->addWikiText($res->numRows(). 'results'); if ($res->numRows() == 0){ @@ -117,10 +117,31 @@ if ( $res ) { foreach ( $res as $row ) { $wgOut->addWikiText( '*' . $row->mathobservation_featuretype . ' <code>' . - utf8_decode( $row->mathobservation_featurename ) . '</code> (' . $row->localcnt . '/' - . $row->pagestat_featurecount . "/" . $row->varstat_featurecount . ')' ); + utf8_decode( $row->mathobservation_featurename ) . '</code> (' . $row->localcnt . '/' + . $row->pagestat_featurecount . "/" . $row->varstat_featurecount . ')' ); + $identifiers = $this->getNouns(utf8_decode( $row->mathobservation_featurename )) ; + if ( $identifiers ){ + foreach($identifiers as $identifier){ + $wgOut->addWikiText('**'.$identifier->noun .'('.$identifier->evidence.')'); + } + } else { + $wgOut->addWikiText('** not found'); + } } } + } + public function getNouns($identifier){ + $dbr = wfGetDB( DB_SLAVE ); + $article = Article::newFromId( $this->pageID ); + $pagename = (string)$article->getTitle();; + $identifiers = $dbr->select('math_identifier', + array( 'noun', 'evidence', 'sentence' ), + array( 'pageTitle' => $pagename, 'identifier' => $identifier), + __METHOD__ , + array('ORDER BY' => 'evidence DESC', 'LIMIT' => 5) + ); + + } public function updateObservations( $dbw = null ) { @@ -147,7 +168,7 @@ } /** - * + * * @param int $pid * @param int $eid * @return self instance @@ -155,8 +176,8 @@ public static function constructformpage( $pid, $eid ) { $dbr = wfGetDB( DB_SLAVE ); $res = $dbr->selectRow( - array( 'mathindex' ), self::dbIndexFieldsArray(), 'mathindex_page_id = ' . $pid - . ' AND mathindex_anchor= ' . $eid + array( 'mathindex' ), self::dbIndexFieldsArray(), 'mathindex_page_id = ' . $pid + . ' AND mathindex_anchor= ' . $eid ); //self::DebugPrint( var_export( $res, true ) ); $start = microtime(true); @@ -173,7 +194,7 @@ $out = array( ); $dbr = wfGetDB( DB_SLAVE ); $res = $dbr->select( - 'mathindex', self::dbIndexFieldsArray(), array( 'mathindex_inputhash' => $this->getInputHash() ) + 'mathindex', self::dbIndexFieldsArray(), array( 'mathindex_inputhash' => $this->getInputHash() ) ); foreach ( $res as $row ) { @@ -197,7 +218,7 @@ $wgOut->addHtml( " " ); $pageString = $hidePage ? "" : $this->getPageTitle() . " "; $wgOut->addWikiText( "[[" . $this->getPageTitle() . "#math" . $this->getAnchorID() - . "|" . $pageString . "Eq: " . $this->getAnchorID() . "]] ", false ); + . "|" . $pageString . "Eq: " . $this->getAnchorID() . "]] ", false ); // $wgOut->addHtml( MathLaTeXML::embedMathML( $this->mathml ) ); $wgOut->addHtml( "<br />" ); } diff --git a/MathSearch.hooks.php b/MathSearch.hooks.php index a59a40a..e56501a 100644 --- a/MathSearch.hooks.php +++ b/MathSearch.hooks.php @@ -33,6 +33,7 @@ $updater->addExtensionTable( 'mathobservation', $dir . 'mathobservation.sql' ); $updater->addExtensionTable( 'mathvarstat', $dir . 'mathvarstat.sql' ); $updater->addExtensionTable( 'mathpagestat', $dir . 'mathpagestat.sql' ); + $updater->addExtensionTable( 'mathsemantics', $dir . 'mathsemantics.sql' ); } else { //throw new MWException( "Math extension does not currently support $type database." ); } @@ -111,4 +112,4 @@ $files = array_merge( $files, glob( "$testDir/*Test.php" ) ); return true; } -} \ No newline at end of file +} diff --git a/db/mathsemantics.sql b/db/mathsemantics.sql new file mode 100644 index 0000000..3c0dcc2 --- /dev/null +++ b/db/mathsemantics.sql @@ -0,0 +1,8 @@ +CREATE TABLE `mathsemantics` ( + `pageId` int(5) NOT NULL, + `identifier` varchar(4) NOT NULL, + `evidence` double NOT NULL, + `noun` varchar(20) NOT NULL, + `sentence` text NOT NULL, + KEY `pageId` (`pageId`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; diff --git a/db/CosProd.sql b/db/snippets/CosProd.sql similarity index 100% rename from db/CosProd.sql rename to db/snippets/CosProd.sql diff --git a/db/snippets/createSentenceHash.sql b/db/snippets/createSentenceHash.sql new file mode 100644 index 0000000..50d2af6 --- /dev/null +++ b/db/snippets/createSentenceHash.sql @@ -0,0 +1,5 @@ +UPDATE math_semantics +SET + sentenceHash = MD5(sentence) +WHERE + sentenceHash is null \ No newline at end of file diff --git a/db/getNorm.sql b/db/snippets/getNorm.sql similarity index 100% rename from db/getNorm.sql rename to db/snippets/getNorm.sql diff --git a/db/snippets/mathidentifier.view.sql b/db/snippets/mathidentifier.view.sql new file mode 100644 index 0000000..7300777 --- /dev/null +++ b/db/snippets/mathidentifier.view.sql @@ -0,0 +1,16 @@ +CREATE + ALGORITHM = UNDEFINED + DEFINER = `root`@`localhost` + SQL SECURITY DEFINER +VIEW `math_identifier` AS + select + `S`.`identifier` AS `identifier`, + `S`.`noun` AS `noun`, + `S`.`evidence` AS `evidence`, + `S`.`sentence` AS `sentence`, + `S`.`sentenceHash` AS `sentenceHash`, + `M`.`pageTitle` AS `pageTitle`, + `M`.`pageId` AS `pageID` + from + (`mathsemantics` `S` + join `mathIdMap` `M` ON ((`S`.`pageId` = `M`.`pageId`))) \ No newline at end of file diff --git a/db/snippets/most_common_meanings.sql b/db/snippets/most_common_meanings.sql new file mode 100644 index 0000000..94fe3b1 --- /dev/null +++ b/db/snippets/most_common_meanings.sql @@ -0,0 +1 @@ +select identifier, noun, count(*) FROM wikienmath.math_identifier group by identifier, noun order by count(noun) DESC; \ No newline at end of file diff --git a/runTestsOnVagrant b/runTestsOnVagrant new file mode 100644 index 0000000..ba9a10a --- /dev/null +++ b/runTestsOnVagrant @@ -0,0 +1,2 @@ +#/bin/bash +/vagrant/mediawiki/tests/phpunit/phpunit.php ./tests/ \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/115107 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2400975895fca5d61babe8703a237d5038f156dc Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/extensions/MathSearch Gerrit-Branch: master Gerrit-Owner: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits