jenkins-bot has submitted this change and it was merged. Change subject: Replace pageIds by revisionIds ......................................................................
Replace pageIds by revisionIds Change-Id: I1515abf2a01ec0153bb67cf00ec72cc56ca82142 --- M MathObject.php M MathSearch.hooks.php M db/mathobservation.sql D db/mathpagestat.sql A db/mathrevisionstat.sql M db/snippets/CosProd.sql M db/snippets/getNorm.sql M maintenance/CalculateDistances.php M maintenance/ExtractFeatures.php M maintenance/GenerateFeatureTable.php 10 files changed, 51 insertions(+), 46 deletions(-) Approvals: Physikerwelt: Looks good to me, approved jenkins-bot: Verified diff --git a/MathObject.php b/MathObject.php index f00f7f5..bc2670c 100644 --- a/MathObject.php +++ b/MathObject.php @@ -104,14 +104,14 @@ global $wgOut; $dbr = wfGetDB( DB_SLAVE ); try { - $res = $dbr->select( array( "mathobservation", "mathvarstat", 'mathpagestat' ) + $res = $dbr->select( array( "mathobservation", "mathvarstat", 'mathrevisionstat' ) , array( "mathobservation_featurename", "mathobservation_featuretype", 'varstat_featurecount', - 'pagestat_featurecount', "count(*) as localcnt" ), + 'revstat_featurecount', "count(*) as localcnt" ), array( "mathobservation_inputhash" => $this->getInputHash(), 'varstat_featurename = mathobservation_featurename', 'varstat_featuretype = mathobservation_featuretype', - 'pagestat_pageid' => $this->getRevisionID(), - 'pagestat_featureid = varstat_id' + 'revstat_revid' => $this->getRevisionID(), + 'revstat_featureid = varstat_id' ) , __METHOD__, array( 'GROUP BY' => 'mathobservation_featurename', 'ORDER BY' => 'varstat_featurecount' ) @@ -180,12 +180,15 @@ $dbgiven = true; } $dbw->delete( "mathobservation", array( "mathobservation_inputhash" => $this->getInputHash() ) ); + wfDebugLog('MathSearch', 'delete obervations for '.bin2hex($this->getInputHash())); foreach ( $rule as $feature ) { $dbw->insert( "mathobservation", array( "mathobservation_inputhash" => $this->getInputHash(), "mathobservation_featurename" => utf8_encode( trim( $feature[ 4 ] ) ), "mathobservation_featuretype" => utf8_encode( $feature[ 1 ] ), ) ); + wfDebugLog('MathSearch', 'insert observation for '.bin2hex($this->getInputHash()) + . utf8_encode( trim( $feature[ 4 ] ) )); } if ( !$dbgiven ) { $dbw->commit(); @@ -339,8 +342,8 @@ . "JOIN mathindex ON `mathobservation_inputhash` = mathindex_inputhash\n" . "GROUP BY `mathobservation_featurename` , `mathobservation_featuretype`\n" . "ORDER BY CNT DESC"); - $dbw->query( 'TRUNCATE TABLE `mathpagestat`' ); - $dbw->query( 'INSERT INTO `mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`) ' + $dbw->query( 'TRUNCATE TABLE `mathrevisionstat`' ); + $dbw->query( 'INSERT INTO `mathrevisionstat`(`revstat_featureid`,`revstat_revid`,`revstat_featurecount`) ' . 'SELECT varstat_id, mathindex_revision_id, count(*) AS CNT FROM `mathobservation` ' . 'JOIN mathindex ON `mathobservation_inputhash` = mathindex_inputhash ' . 'JOIN mathvarstat ON varstat_featurename = `mathobservation_featurename` AND varstat_featuretype = `mathobservation_featuretype` ' diff --git a/MathSearch.hooks.php b/MathSearch.hooks.php index 559a91a..169c1c2 100644 --- a/MathSearch.hooks.php +++ b/MathSearch.hooks.php @@ -26,7 +26,7 @@ $updater->addExtensionTable( 'mathindex', $dir . 'mathindex.sql' ); $updater->addExtensionTable( 'mathobservation', $dir . 'mathobservation.sql' ); $updater->addExtensionTable( 'mathvarstat', $dir . 'mathvarstat.sql' ); - $updater->addExtensionTable( 'mathpagestat', $dir . 'mathpagestat.sql' ); + $updater->addExtensionTable( 'mathrevisionstat', $dir . 'mathrevisionstat.sql' ); $updater->addExtensionTable( 'mathsemantics', $dir . 'mathsemantics.sql' ); $updater->addExtensionTable( 'mathperformance', $dir . 'mathperformance.sql' ); $updater->addExtensionTable( 'mathidentifier', $dir . 'mathidentifier.sql' ); @@ -85,7 +85,8 @@ 'mathindex_inputhash' => $inputHash) ) ; if ( $exists ) { - wfDebugLog( "MathSearch", 'Index $' . $tex . '$ already in database.' ); + wfDebugLog( 'MathSearch', 'Index $' . $tex . '$ already in database.'); + wfDebugLog( 'MathSearch', "$revId-$eid with hash ". bin2hex($inputHash) ); } else { self::writeMathIndex( $revId, $eid, $inputHash, $tex ); } diff --git a/db/mathobservation.sql b/db/mathobservation.sql index 21db97e..3be2af2 100644 --- a/db/mathobservation.sql +++ b/db/mathobservation.sql @@ -7,10 +7,10 @@ -- Binary MD5 hash of the latex fragment, used as an identifier key. mathobservation_inputhash varbinary(16) NOT NULL, - --Type of the feature e.g. mo, mi + -- Type of the feature e.g. mo, mi mathobservation_featuretype varchar(10) NOT NULL, - --Name of the feature. eg name of the variable + -- Name of the feature. eg name of the variable mathobservation_featurename varchar(10) NOT NULL, -- Timestamp. Is set by the database autmatically diff --git a/db/mathpagestat.sql b/db/mathpagestat.sql deleted file mode 100644 index d54e779..0000000 --- a/db/mathpagestat.sql +++ /dev/null @@ -1,11 +0,0 @@ --- --- Used by the math search module to analyse the variables in the equations. --- -CREATE TABLE /*_*/mathpagestat ( - pagestat_pageid int(10) NOT NULL, - pagestat_featureid int(6) NOT NULL, - pagestat_featurecount int(11) NOT NULL, - PRIMARY KEY (pagestat_pageid,pagestat_featureid), - KEY `pagestat_pageid` (`pagestat_pageid`) -) /*$wgDBTableOptions*/; - diff --git a/db/mathrevisionstat.sql b/db/mathrevisionstat.sql new file mode 100644 index 0000000..33574eb --- /dev/null +++ b/db/mathrevisionstat.sql @@ -0,0 +1,12 @@ +-- +-- Used by the math search module to analyse the variables in the equations. +-- +CREATE TABLE /*_*/mathrevisionstat ( + revstat_revid int(10) NOT NULL, + revstat_featureid int(6) NOT NULL, + revstat_featurecount int(11) NOT NULL, + PRIMARY KEY (revstat_revid,revstat_featureid), + FOREIGN KEY `revision` ( revstat_revid ) REFERENCES revision( rev_id ), + FOREIGN KEY `featureID` ( revstat_featureid ) REFERENCES mathvarstat ( varstat_featurename ) +) /*$wgDBTableOptions*/; + diff --git a/db/snippets/CosProd.sql b/db/snippets/CosProd.sql index b9add42..fa52f33 100644 --- a/db/snippets/CosProd.sql +++ b/db/snippets/CosProd.sql @@ -8,10 +8,10 @@ return ( SELECT SUM(CAST(LOG( a.`pagestat_featurecount`)* LOG(b.`pagestat_featurecount`) as DECIMAL(20,10)) /(LOG(varstat_featurecount)*LOG(varstat_featurecount)) )/(getNorm(IDA)* getNorm(IDB)) - from mathpagestat as a, mathpagestat as b, mathvarstat as s - WHERE (b.`pagestat_pageid`= IDA and a.`pagestat_pageid`=IDB - and a.`pagestat_featureid`=b.`pagestat_featureid` - and a.`pagestat_featureid`=s.varstat_id) + from mathrevisionstat as a, mathrevisionstat as b, mathvarstat as s + WHERE (b.revstat_revid= IDA and a.revstat_revid=IDB + and a.revstat_featureid=b.revstat_featureid + and a.revstat_featureid=s.varstat_id) ); END$$ diff --git a/db/snippets/getNorm.sql b/db/snippets/getNorm.sql index b0581c2..3b0120c 100644 --- a/db/snippets/getNorm.sql +++ b/db/snippets/getNorm.sql @@ -7,9 +7,9 @@ DECLARE output DECIMAL(20,10); SELECT SUM(POW(LOG(CAST(`pagestat_featurecount`as decimal(20,10)))/LOG(varstat_featurecount),2)) as norm INTO output -FROM `mathpagestat` -JOIN mathvarstat on `pagestat_featureid` = varstat_id -WHERE `pagestat_pageid` =pid order by norm desc; +FROM mathrevisionstat +JOIN mathvarstat on revstat_featureid = varstat_id +WHERE revstat_revid =pid order by norm desc; return POW(output,1/2); END$$ diff --git a/maintenance/CalculateDistances.php b/maintenance/CalculateDistances.php index d377aa8..87afb42 100644 --- a/maintenance/CalculateDistances.php +++ b/maintenance/CalculateDistances.php @@ -56,18 +56,18 @@ $this->pagelist = array(); $min = $this->getArg( 0, 0 ); $max = $this->getArg( 1, PHP_INT_MAX ); - $conds = "pagestat_pageid >= $min"; + $conds = "revstat_revid >= $min"; if ( $max < PHP_INT_MAX ) { - $conds .= " AND pagestat_pageid <= $max"; + $conds .= " AND revstat_revid <= $max"; } if ( $this->getOption( 'page9', false ) ) { $res = - $this->db->select( array( 'mathpage9', 'mathpagestat' ), - array( 'page_id', 'pagestat_pageid' ), - $conds . ' AND pagestat_pageid = page_id', __METHOD__, array( 'DISTINCT' ) ); + $this->db->select( array( 'mathpage9', 'mathrevisionstat' ), + array( 'page_id', 'revstat_revid' ), + $conds . ' AND revstat_revid = page_id', __METHOD__, array( 'DISTINCT' ) ); } else { $res = - $this->db->select( 'mathpagestat', 'pagestat_pageid', $conds, __METHOD__, + $this->db->select( 'mathrevisionstat', 'revstat_revid', $conds, __METHOD__, array( 'DISTINCT' ) ); } foreach ( $res as $row ) { @@ -94,14 +94,14 @@ $pid = $this->pagelist[$n]; $sql = "INSERT IGNORE INTO mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value)\n" . - "SELECT DISTINCT $pid,`pagestat_pageid`,\n" . - "CosProd( $pid,`pagestat_pageid`) FROM `mathpagestat` m "; + "SELECT DISTINCT $pid,`revstat_revid`,\n" . + "CosProd( $pid,`revstat_revid`) FROM `mathrevisionstat` m "; if ( $this->getOption( 'page9', false ) ) { - $sql .= " JOIN (SELECT page_id from mathpage9) as r WHERE m.pagestat_pageid=r.page_id AND "; + $sql .= " JOIN (SELECT page_id from mathpage9) as r WHERE m.revstat_revid=r.page_id AND "; } else { $sql .= " WHERE "; } - $sql .= "m.pagestat_pageid < $pid "; + $sql .= "m.revstat_revid < $pid "; echo "writing entries for page $pid..."; $start = microtime( true ); $this->dbw->query( $sql ); diff --git a/maintenance/ExtractFeatures.php b/maintenance/ExtractFeatures.php index 1e55e97..ffe12f7 100644 --- a/maintenance/ExtractFeatures.php +++ b/maintenance/ExtractFeatures.php @@ -106,12 +106,12 @@ 'GROUP BY `mathobservation_featurename` , `mathobservation_featuretype`\n' . 'ORDER BY CNT DESC'; $this->dbw->query( $sql ); - $this->output( "Clear mathpagestat\n" ); - $sql = 'TRUNCATE TABLE `mathpagestat`'; + $this->output( "Clear mathrevisionstat\n" ); + $sql = 'TRUNCATE TABLE `mathrevisionstat`'; $this->dbw->query( $sql ); - $this->output( "Generate mathpagestat\n" ); + $this->output( "Generate mathrevisionstat\n" ); $sql = - 'INSERT INTO `mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`)\n' . + 'INSERT INTO `mathrevisionstat`(`revstat_featureid`,`revstat_revid`,`revstat_featurecount`)\n' . 'SELECT varstat_id, mathindex_revision_id, count(*) AS CNT FROM `mathobservation` JOIN mathindex ON `mathobservation_inputhash` =mathindex_inputhash\n' . 'JOIN mathvarstat ON varstat_featurename = `mathobservation_featurename` AND varstat_featuretype = `mathobservation_featuretype`\n' . ' GROUP BY `mathobservation_featurename`, `mathobservation_featuretype`,mathindex_revision_id ORDER BY CNT DESC'; diff --git a/maintenance/GenerateFeatureTable.php b/maintenance/GenerateFeatureTable.php index c719afb..035f2f5 100644 --- a/maintenance/GenerateFeatureTable.php +++ b/maintenance/GenerateFeatureTable.php @@ -92,15 +92,15 @@ private function doUpdate( $pid ) { // TODO: fix link id problem $res = - $this->db->select( array( 'mathpagestat', 'mathvarstat' ), array( - 'pagestat_pageid', + $this->db->select( array( 'mathrevisionstat', 'mathvarstat' ), array( + 'revstat_revid', 'pagestat_featurename', 'pagestat_featuretype', - 'pagestat_featurecount', + 'revstat_featurecount', 'varstat_id', 'varstat_featurecount' ), array( - 'pagestat_pageid' => $pid, + 'revstat_revid' => $pid, 'pagestat_featurename = varstat_featurename', 'pagestat_featuretype=varstat_featuretype' ), __METHOD__ ); -- To view, visit https://gerrit.wikimedia.org/r/193101 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I1515abf2a01ec0153bb67cf00ec72cc56ca82142 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/MathSearch Gerrit-Branch: master Gerrit-Owner: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: Physikerwelt <w...@physikerwelt.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits