jenkins-bot has submitted this change and it was merged.

Change subject: Replace pageIds by revisionIds
......................................................................


Replace pageIds by revisionIds

Change-Id: I1515abf2a01ec0153bb67cf00ec72cc56ca82142
---
M MathObject.php
M MathSearch.hooks.php
M db/mathobservation.sql
D db/mathpagestat.sql
A db/mathrevisionstat.sql
M db/snippets/CosProd.sql
M db/snippets/getNorm.sql
M maintenance/CalculateDistances.php
M maintenance/ExtractFeatures.php
M maintenance/GenerateFeatureTable.php
10 files changed, 51 insertions(+), 46 deletions(-)

Approvals:
  Physikerwelt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/MathObject.php b/MathObject.php
index f00f7f5..bc2670c 100644
--- a/MathObject.php
+++ b/MathObject.php
@@ -104,14 +104,14 @@
                global $wgOut;
                $dbr = wfGetDB( DB_SLAVE );
                try {
-                       $res = $dbr->select( array( "mathobservation", 
"mathvarstat", 'mathpagestat' )
+                       $res = $dbr->select( array( "mathobservation", 
"mathvarstat", 'mathrevisionstat' )
                                , array( "mathobservation_featurename", 
"mathobservation_featuretype", 'varstat_featurecount',
-                                       'pagestat_featurecount', "count(*) as 
localcnt" ),
+                                       'revstat_featurecount', "count(*) as 
localcnt" ),
                                        array( "mathobservation_inputhash" => 
$this->getInputHash(),
                                        'varstat_featurename = 
mathobservation_featurename',
                                        'varstat_featuretype = 
mathobservation_featuretype',
-                                       'pagestat_pageid' => 
$this->getRevisionID(),
-                                       'pagestat_featureid = varstat_id'
+                                       'revstat_revid' => 
$this->getRevisionID(),
+                                       'revstat_featureid = varstat_id'
                                )
                                , __METHOD__, array( 'GROUP BY' => 
'mathobservation_featurename',
                                        'ORDER BY' => 'varstat_featurecount' )
@@ -180,12 +180,15 @@
                        $dbgiven = true;
                }
                $dbw->delete( "mathobservation", array( 
"mathobservation_inputhash" => $this->getInputHash() ) );
+               wfDebugLog('MathSearch', 'delete obervations for 
'.bin2hex($this->getInputHash()));
                foreach ( $rule as $feature ) {
                        $dbw->insert( "mathobservation", array(
                                "mathobservation_inputhash" => 
$this->getInputHash(),
                                "mathobservation_featurename" => utf8_encode( 
trim( $feature[ 4 ] ) ),
                                "mathobservation_featuretype" => utf8_encode( 
$feature[ 1 ] ),
                        ) );
+                       wfDebugLog('MathSearch', 'insert observation for 
'.bin2hex($this->getInputHash())
+                       . utf8_encode( trim( $feature[ 4 ] )            ));
                }
                if ( !$dbgiven ) {
                        $dbw->commit();
@@ -339,8 +342,8 @@
                        . "JOIN mathindex ON `mathobservation_inputhash` = 
mathindex_inputhash\n"
                        . "GROUP BY `mathobservation_featurename` , 
`mathobservation_featuretype`\n"
                        . "ORDER BY CNT DESC");
-               $dbw->query( 'TRUNCATE TABLE `mathpagestat`' );
-               $dbw->query( 'INSERT INTO 
`mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`) '
+               $dbw->query( 'TRUNCATE TABLE `mathrevisionstat`' );
+               $dbw->query( 'INSERT INTO 
`mathrevisionstat`(`revstat_featureid`,`revstat_revid`,`revstat_featurecount`) '
                        . 'SELECT varstat_id, mathindex_revision_id, count(*) 
AS CNT FROM `mathobservation` '
                        . 'JOIN mathindex ON `mathobservation_inputhash` = 
mathindex_inputhash '
                        . 'JOIN mathvarstat ON varstat_featurename = 
`mathobservation_featurename` AND varstat_featuretype = 
`mathobservation_featuretype` '
diff --git a/MathSearch.hooks.php b/MathSearch.hooks.php
index 559a91a..169c1c2 100644
--- a/MathSearch.hooks.php
+++ b/MathSearch.hooks.php
@@ -26,7 +26,7 @@
                        $updater->addExtensionTable( 'mathindex', $dir . 
'mathindex.sql' );
                        $updater->addExtensionTable( 'mathobservation',  $dir . 
'mathobservation.sql' );
                        $updater->addExtensionTable( 'mathvarstat', $dir . 
'mathvarstat.sql' );
-                       $updater->addExtensionTable( 'mathpagestat', $dir . 
'mathpagestat.sql' );
+                       $updater->addExtensionTable( 'mathrevisionstat', $dir . 
'mathrevisionstat.sql' );
                        $updater->addExtensionTable( 'mathsemantics', $dir . 
'mathsemantics.sql' );
                        $updater->addExtensionTable( 'mathperformance', $dir . 
'mathperformance.sql' );
                        $updater->addExtensionTable( 'mathidentifier', $dir . 
'mathidentifier.sql' );
@@ -85,7 +85,8 @@
                                        'mathindex_inputhash' => $inputHash)
                        ) ;
                        if ( $exists ) {
-                               wfDebugLog( "MathSearch", 'Index $' . $tex . '$ 
already in database.' );
+                               wfDebugLog( 'MathSearch', 'Index $' . $tex . '$ 
already in database.');
+                               wfDebugLog( 'MathSearch', "$revId-$eid with 
hash ". bin2hex($inputHash) );
                        } else {
                                self::writeMathIndex( $revId, $eid, $inputHash, 
$tex );
                        }
diff --git a/db/mathobservation.sql b/db/mathobservation.sql
index 21db97e..3be2af2 100644
--- a/db/mathobservation.sql
+++ b/db/mathobservation.sql
@@ -7,10 +7,10 @@
   -- Binary MD5 hash of the latex fragment, used as an identifier key.
   mathobservation_inputhash varbinary(16) NOT NULL,
   
-  --Type of the feature e.g. mo, mi
+  -- Type of the feature e.g. mo, mi
   mathobservation_featuretype varchar(10) NOT NULL,
   
-  --Name of the feature. eg name of the variable
+  -- Name of the feature. eg name of the variable
   mathobservation_featurename varchar(10) NOT NULL,
 
   -- Timestamp. Is set by the database autmatically
diff --git a/db/mathpagestat.sql b/db/mathpagestat.sql
deleted file mode 100644
index d54e779..0000000
--- a/db/mathpagestat.sql
+++ /dev/null
@@ -1,11 +0,0 @@
---
--- Used by the math search module to analyse the variables in the equations.
---
-CREATE TABLE /*_*/mathpagestat (
-  pagestat_pageid int(10) NOT NULL,
-  pagestat_featureid int(6) NOT NULL,
-  pagestat_featurecount int(11) NOT NULL,
-  PRIMARY KEY (pagestat_pageid,pagestat_featureid),
-  KEY `pagestat_pageid` (`pagestat_pageid`)
-) /*$wgDBTableOptions*/;
-
diff --git a/db/mathrevisionstat.sql b/db/mathrevisionstat.sql
new file mode 100644
index 0000000..33574eb
--- /dev/null
+++ b/db/mathrevisionstat.sql
@@ -0,0 +1,12 @@
+--
+-- Used by the math search module to analyse the variables in the equations.
+--
+CREATE TABLE /*_*/mathrevisionstat (
+  revstat_revid int(10) NOT NULL,
+  revstat_featureid int(6) NOT NULL,
+  revstat_featurecount int(11) NOT NULL,
+  PRIMARY KEY (revstat_revid,revstat_featureid),
+  FOREIGN KEY `revision` ( revstat_revid ) REFERENCES revision( rev_id ),
+  FOREIGN KEY `featureID` ( revstat_featureid ) REFERENCES mathvarstat ( 
varstat_featurename )
+) /*$wgDBTableOptions*/;
+
diff --git a/db/snippets/CosProd.sql b/db/snippets/CosProd.sql
index b9add42..fa52f33 100644
--- a/db/snippets/CosProd.sql
+++ b/db/snippets/CosProd.sql
@@ -8,10 +8,10 @@
 return (
        SELECT SUM(CAST(LOG( a.`pagestat_featurecount`)* 
LOG(b.`pagestat_featurecount`) as DECIMAL(20,10))
                /(LOG(varstat_featurecount)*LOG(varstat_featurecount)) 
)/(getNorm(IDA)* getNorm(IDB))
-       from mathpagestat as a, mathpagestat as b,  mathvarstat as s
-       WHERE (b.`pagestat_pageid`= IDA and a.`pagestat_pageid`=IDB 
-       and a.`pagestat_featureid`=b.`pagestat_featureid` 
-       and a.`pagestat_featureid`=s.varstat_id)
+       from mathrevisionstat as a, mathrevisionstat as b,  mathvarstat as s
+       WHERE (b.revstat_revid= IDA and a.revstat_revid=IDB
+       and a.revstat_featureid=b.revstat_featureid
+       and a.revstat_featureid=s.varstat_id)
 );
 END$$
 
diff --git a/db/snippets/getNorm.sql b/db/snippets/getNorm.sql
index b0581c2..3b0120c 100644
--- a/db/snippets/getNorm.sql
+++ b/db/snippets/getNorm.sql
@@ -7,9 +7,9 @@
 DECLARE output DECIMAL(20,10);
 SELECT SUM(POW(LOG(CAST(`pagestat_featurecount`as 
decimal(20,10)))/LOG(varstat_featurecount),2)) as norm
 INTO output
-FROM `mathpagestat`
-JOIN mathvarstat on `pagestat_featureid` = varstat_id
-WHERE `pagestat_pageid` =pid order by norm desc;
+FROM mathrevisionstat
+JOIN mathvarstat on revstat_featureid = varstat_id
+WHERE revstat_revid =pid order by norm desc;
 return POW(output,1/2);
 END$$
 
diff --git a/maintenance/CalculateDistances.php 
b/maintenance/CalculateDistances.php
index d377aa8..87afb42 100644
--- a/maintenance/CalculateDistances.php
+++ b/maintenance/CalculateDistances.php
@@ -56,18 +56,18 @@
                $this->pagelist = array();
                $min = $this->getArg( 0, 0 );
                $max = $this->getArg( 1, PHP_INT_MAX );
-               $conds = "pagestat_pageid >= $min";
+               $conds = "revstat_revid >= $min";
                if ( $max < PHP_INT_MAX ) {
-                       $conds .= " AND pagestat_pageid <= $max";
+                       $conds .= " AND revstat_revid <= $max";
                }
                if ( $this->getOption( 'page9', false ) ) {
                        $res =
-                               $this->db->select( array( 'mathpage9', 
'mathpagestat' ),
-                                       array( 'page_id', 'pagestat_pageid' ),
-                                       $conds . ' AND pagestat_pageid = 
page_id', __METHOD__, array( 'DISTINCT' ) );
+                               $this->db->select( array( 'mathpage9', 
'mathrevisionstat' ),
+                                       array( 'page_id', 'revstat_revid' ),
+                                       $conds . ' AND revstat_revid = 
page_id', __METHOD__, array( 'DISTINCT' ) );
                } else {
                        $res =
-                               $this->db->select( 'mathpagestat', 
'pagestat_pageid', $conds, __METHOD__,
+                               $this->db->select( 'mathrevisionstat', 
'revstat_revid', $conds, __METHOD__,
                                        array( 'DISTINCT' ) );
                }
                foreach ( $res as $row ) {
@@ -94,14 +94,14 @@
                                $pid = $this->pagelist[$n];
                                $sql =
                                        "INSERT IGNORE INTO 
mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value)\n" .
-                                       "SELECT DISTINCT 
$pid,`pagestat_pageid`,\n" .
-                                       "CosProd( $pid,`pagestat_pageid`) FROM 
`mathpagestat` m ";
+                                       "SELECT DISTINCT 
$pid,`revstat_revid`,\n" .
+                                       "CosProd( $pid,`revstat_revid`) FROM 
`mathrevisionstat` m ";
                                if ( $this->getOption( 'page9', false ) ) {
-                                       $sql .= " JOIN (SELECT page_id from 
mathpage9) as r WHERE m.pagestat_pageid=r.page_id AND ";
+                                       $sql .= " JOIN (SELECT page_id from 
mathpage9) as r WHERE m.revstat_revid=r.page_id AND ";
                                } else {
                                        $sql .= " WHERE ";
                                }
-                               $sql .= "m.pagestat_pageid < $pid ";
+                               $sql .= "m.revstat_revid < $pid ";
                                echo "writing entries for page $pid...";
                                $start = microtime( true );
                                $this->dbw->query( $sql );
diff --git a/maintenance/ExtractFeatures.php b/maintenance/ExtractFeatures.php
index 1e55e97..ffe12f7 100644
--- a/maintenance/ExtractFeatures.php
+++ b/maintenance/ExtractFeatures.php
@@ -106,12 +106,12 @@
                        'GROUP BY `mathobservation_featurename` , 
`mathobservation_featuretype`\n' .
                        'ORDER BY CNT DESC';
                $this->dbw->query( $sql );
-               $this->output( "Clear mathpagestat\n" );
-               $sql = 'TRUNCATE TABLE `mathpagestat`';
+               $this->output( "Clear mathrevisionstat\n" );
+               $sql = 'TRUNCATE TABLE `mathrevisionstat`';
                $this->dbw->query( $sql );
-               $this->output( "Generate mathpagestat\n" );
+               $this->output( "Generate mathrevisionstat\n" );
                $sql =
-                       'INSERT INTO 
`mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`)\n'
 .
+                       'INSERT INTO 
`mathrevisionstat`(`revstat_featureid`,`revstat_revid`,`revstat_featurecount`)\n'
 .
                        'SELECT varstat_id, mathindex_revision_id, count(*) AS 
CNT FROM `mathobservation` JOIN mathindex ON `mathobservation_inputhash` 
=mathindex_inputhash\n' .
                        'JOIN mathvarstat ON varstat_featurename = 
`mathobservation_featurename` AND varstat_featuretype = 
`mathobservation_featuretype`\n' .
                        ' GROUP BY `mathobservation_featurename`, 
`mathobservation_featuretype`,mathindex_revision_id ORDER BY CNT DESC';
diff --git a/maintenance/GenerateFeatureTable.php 
b/maintenance/GenerateFeatureTable.php
index c719afb..035f2f5 100644
--- a/maintenance/GenerateFeatureTable.php
+++ b/maintenance/GenerateFeatureTable.php
@@ -92,15 +92,15 @@
        private function doUpdate( $pid ) {
                // TODO: fix link id problem
                $res =
-                       $this->db->select( array( 'mathpagestat', 'mathvarstat' 
), array(
-                                       'pagestat_pageid',
+                       $this->db->select( array( 'mathrevisionstat', 
'mathvarstat' ), array(
+                                       'revstat_revid',
                                        'pagestat_featurename',
                                        'pagestat_featuretype',
-                                       'pagestat_featurecount',
+                                       'revstat_featurecount',
                                        'varstat_id',
                                        'varstat_featurecount'
                                ), array(
-                                       'pagestat_pageid' => $pid,
+                                       'revstat_revid' => $pid,
                                        'pagestat_featurename = 
varstat_featurename',
                                        
'pagestat_featuretype=varstat_featuretype'
                                ), __METHOD__ );

-- 
To view, visit https://gerrit.wikimedia.org/r/193101
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I1515abf2a01ec0153bb67cf00ec72cc56ca82142
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MathSearch
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <w...@physikerwelt.de>
Gerrit-Reviewer: Physikerwelt <w...@physikerwelt.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to