Aaron Schulz has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/307057

Change subject: Add more estimation modes to pendingWriteQueryDuration()
......................................................................

Add more estimation modes to pendingWriteQueryDuration()

* Use this to avoid harmless queries that happen to block on
  row-level locks for a long time. This does not applies to
  UPDATE/DELETE.
* Update commitMasterChanges() and JobRunner to use the new
  mode to pointless rollback or lag checks.

Change-Id: Ifc2743f2d8cd109840c45cda5028fbb4df55d231
---
M includes/db/DBConnRef.php
M includes/db/Database.php
M includes/db/IDatabase.php
M includes/db/loadbalancer/LoadBalancer.php
M includes/jobqueue/JobRunner.php
5 files changed, 127 insertions(+), 34 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/57/307057/1

diff --git a/includes/db/DBConnRef.php b/includes/db/DBConnRef.php
index 790a073..86d40f4 100644
--- a/includes/db/DBConnRef.php
+++ b/includes/db/DBConnRef.php
@@ -103,7 +103,7 @@
                return $this->__call( __FUNCTION__, func_get_args() );
        }
 
-       public function pendingWriteQueryDuration() {
+       public function pendingWriteQueryDuration( $type = self::ESTIMATE_TOTAL 
) {
                return $this->__call( __FUNCTION__, func_get_args() );
        }
 
@@ -477,8 +477,10 @@
                return $this->__call( __FUNCTION__, func_get_args() );
        }
 
-       public function ping() {
-               return $this->__call( __FUNCTION__, func_get_args() );
+       public function ping( &$rtt = null ) {
+               return func_num_args()
+                       ? $this->__call( __FUNCTION__, [ &$rtt ] )
+                       : $this->__call( __FUNCTION__, [] ); // method cares 
about null vs missing
        }
 
        public function getLag() {
diff --git a/includes/db/Database.php b/includes/db/Database.php
index e07836b..9fae6b5 100644
--- a/includes/db/Database.php
+++ b/includes/db/Database.php
@@ -39,6 +39,7 @@
 
        /** How long before it is worth doing a dummy query to test the 
connection */
        const PING_TTL = 1.0;
+       const PING_QUERY = 'SELECT 1 AS ping';
 
        /** @var string SQL query */
        protected $mLastQuery = '';
@@ -102,7 +103,6 @@
         * @var int
         */
        protected $mTrxLevel = 0;
-
        /**
         * Either a short hexidecimal string if a transaction is active or ""
         *
@@ -110,7 +110,6 @@
         * @see DatabaseBase::mTrxLevel
         */
        protected $mTrxShortId = '';
-
        /**
         * The UNIX time that the transaction started. Callers can assume that 
if
         * snapshot isolation is used, then the data is *at least* up to date 
to that
@@ -120,10 +119,8 @@
         * @see DatabaseBase::mTrxLevel
         */
        private $mTrxTimestamp = null;
-
        /** @var float Lag estimate at the time of BEGIN */
        private $mTrxSlaveLag = null;
-
        /**
         * Remembers the function name given for starting the most recent 
transaction via begin().
         * Used to provide additional context for error reporting.
@@ -132,7 +129,6 @@
         * @see DatabaseBase::mTrxLevel
         */
        private $mTrxFname = null;
-
        /**
         * Record if possible write queries were done in the last transaction 
started
         *
@@ -140,7 +136,6 @@
         * @see DatabaseBase::mTrxLevel
         */
        private $mTrxDoneWrites = false;
-
        /**
         * Record if the current transaction was started implicitly due to 
DBO_TRX being set.
         *
@@ -148,34 +143,44 @@
         * @see DatabaseBase::mTrxLevel
         */
        private $mTrxAutomatic = false;
-
        /**
         * Array of levels of atomicity within transactions
         *
         * @var array
         */
        private $mTrxAtomicLevels = [];
-
        /**
         * Record if the current transaction was started implicitly by 
DatabaseBase::startAtomic
         *
         * @var bool
         */
        private $mTrxAutomaticAtomic = false;
-
        /**
         * Track the write query callers of the current transaction
         *
         * @var string[]
         */
        private $mTrxWriteCallers = [];
-
        /**
-        * Track the seconds spent in write queries for the current transaction
-        *
-        * @var float
+        * @var float Seconds spent in write queries for the current transaction
         */
        private $mTrxWriteDuration = 0.0;
+       /**
+        * @var integer Number of write queries for the current transaction
+        */
+       private $mTrxWriteQueryCount = 0;
+       /**
+        * @var float Like mTrxWriteQueryCount but excludes lock-bound, easy to 
replicate, queries
+        */
+       private $mTrxWriteAdjDuration = 0.0;
+       /**
+        * @var integer Number of write queries counted in mTrxWriteAdjDuration
+        */
+       private $mTrxWriteAdjQueryCount = 0;
+       /**
+        * @var float RTT time estimate
+        */
+       private $mRTTEstimate = 0.0;
 
        /** @var array Map of (name => 1) for locks obtained via lock() */
        private $mNamedLocksHeld = [];
@@ -421,8 +426,30 @@
                );
        }
 
-       public function pendingWriteQueryDuration() {
-               return $this->mTrxLevel ? $this->mTrxWriteDuration : false;
+       public function pendingWriteQueryDuration( $type = self::ESTIMATE_TOTAL 
) {
+               if ( !$this->mTrxLevel ) {
+                       return false;
+               } elseif ( !$this->mTrxDoneWrites ) {
+                       return 0.0;
+               }
+
+               switch ( $type ) {
+                       case self::ESTIMATE_NETWORK:
+                               $this->ping( $rtt );
+
+                               return $this->mTrxWriteQueryCount * $rtt;
+                       case self::ESTIMATE_DB_APPLY:
+                               $this->ping( $rtt );
+                               $rttAdjTotal = $this->mTrxWriteAdjQueryCount * 
$rtt;
+                               $applyTime = max( $this->mTrxWriteAdjDuration - 
$rttAdjTotal, 0 );
+                               // For ommitted queries, make them count as 
something at least
+                               $omitted = $this->mTrxWriteQueryCount - 
$this->mTrxWriteAdjQueryCount;
+                               $applyTime += .010 * $omitted;
+
+                               return $applyTime;
+                       default: // everything
+                               return $this->mTrxWriteDuration;
+               }
        }
 
        public function pendingWriteCallers() {
@@ -946,16 +973,20 @@
 
                $startTime = microtime( true );
                $ret = $this->doQuery( $commentedSql );
-               $queryRuntime = microtime( true ) - $startTime;
+               $queryRuntime = max( microtime( true ) - $startTime, 0.0 );
 
                unset( $queryProfSection ); // profile out (if set)
 
                if ( $ret !== false ) {
                        $this->lastPing = $startTime;
                        if ( $isWrite && $this->mTrxLevel ) {
-                               $this->mTrxWriteDuration += $queryRuntime;
+                               $this->updateTrxWriteQueryTime( $sql, 
$queryRuntime );
                                $this->mTrxWriteCallers[] = $fname;
                        }
+               }
+
+               if ( $sql === self::PING_QUERY ) {
+                       $this->mRTTEstimate = $queryRuntime;
                }
 
                $this->getTransactionProfiler()->recordQueryCompletion(
@@ -964,6 +995,41 @@
                MWDebug::query( $sql, $fname, $isMaster, $queryRuntime );
 
                return $ret;
+       }
+
+       /**
+        * Update the estimated run-time of a query, not counting large row 
lock times
+        *
+        * LoadBalancer can be set to rollback transactions that will create 
huge replication
+        * lag. It bases this estimate off of pendingWriteQueryDuration(). 
Certain simple
+        * queries, like inserting a row can take a long time due to row 
locking. This method
+        * uses some simple heuristics to discount those cases.
+        *
+        * @param string $sql
+        * @param float $runtime Total runtime, including RTT
+        */
+       private function updateTrxWriteQueryTime( $sql, $runtime ) {
+               if ( $runtime < .500 ) {
+                       $isSane = true; // don't bother vetting
+               } elseif ( substr( $sql, 0, 7 ) === 'INSERT ' ) {
+                       // Usually insert(), upsert(), though could be 
insertSelect(). The first two are
+                       // fast unless bulky in size or blocked on locks. 
Likewise for the INSERT..SELECT,
+                       // unless the SELECT scans rows heavily only to find a 
few rows.
+                       $isSane = ( $this->affectedRows() > 50 );
+               } elseif ( substr( $sql, 0, 8 ) === 'REPLACE ' ) {
+                       // Usually replace(). This is a DELETE by unique 
indexes plus an INSERT.
+                       // It should only be slow if it is bulky.
+                       $isSane = ( $this->affectedRows() > 50 );
+               } else {
+                       $isSane = true;
+               }
+
+               $this->mTrxWriteDuration += $runtime;
+               $this->mTrxWriteQueryCount += 1;
+               if ( $isSane ) {
+                       $this->mTrxWriteAdjDuration += $runtime;
+                       $this->mTrxWriteAdjQueryCount += 1;
+               }
        }
 
        private function canRecoverFromDisconnect( $sql, $priorWritesPending ) {
@@ -2740,6 +2806,9 @@
                $this->mTrxAtomicLevels = [];
                $this->mTrxShortId = wfRandomString( 12 );
                $this->mTrxWriteDuration = 0.0;
+               $this->mTrxWriteQueryCount = 0;
+               $this->mTrxWriteAdjDuration = 0.0;
+               $this->mTrxWriteAdjQueryCount = 0;
                $this->mTrxWriteCallers = [];
                // First SELECT after BEGIN will establish the snapshot in 
REPEATABLE-READ.
                // Get an estimate of the slave lag before then, treating 
estimate staleness
@@ -2968,17 +3037,25 @@
                }
        }
 
-       public function ping() {
+       public function ping( &$rtt = null ) {
+               // Avoid hitting the server if it was hit recently
                if ( $this->isOpen() && ( microtime( true ) - $this->lastPing ) 
< self::PING_TTL ) {
-                       return true;
+                       if ( !func_num_args() ) {
+                               return true; // don't care about $rtt
+                       } elseif ( $this->mRTTEstimate > 0.0 ) {
+                               return $this->mRTTEstimate;
+                       }
                }
 
-               $ignoreErrors = true;
-               $this->clearFlag( DBO_TRX, self::REMEMBER_PRIOR );
                // This will reconnect if possible or return false if not
-               $ok = (bool)$this->query( "SELECT 1 AS ping", __METHOD__, 
$ignoreErrors );
+               $this->clearFlag( DBO_TRX, self::REMEMBER_PRIOR );
+               $ok = ( $this->query( self::PING_QUERY ) !== false );
                $this->restoreFlags( self::RESTORE_PRIOR );
 
+               if ( $ok ) {
+                       $rtt = $this->mRTTEstimate;
+               }
+
                return $ok;
        }
 
diff --git a/includes/db/IDatabase.php b/includes/db/IDatabase.php
index 1aa931e..a652b39 100644
--- a/includes/db/IDatabase.php
+++ b/includes/db/IDatabase.php
@@ -59,6 +59,13 @@
        /** @var string Restore to the initial flag state */
        const RESTORE_INITIAL = 'initial';
 
+       /** @var string Estimate total time (RTT, scanning, waiting on locks, 
applying) */
+       const ESTIMATE_TOTAL = 'total';
+       /** @var string Estimate time to apply (scanning, applying) */
+       const ESTIMATE_DB_APPLY = 'apply';
+       /** @var string Estimate time in network (RTT) */
+       const ESTIMATE_NETWORK = 'network';
+
        /**
         * A string describing the current software version, and possibly
         * other details in a user-friendly way. Will be listed on 
Special:Version, etc.
@@ -210,10 +217,11 @@
         *
         * High times could be due to scanning, updates, locking, and such
         *
+        * @param string $type IDatabase::ESTIMATE_* constant [default: 
ESTIMATE_ALL]
         * @return float|bool Returns false if not transaction is active
         * @since 1.26
         */
-       public function pendingWriteQueryDuration();
+       public function pendingWriteQueryDuration( $type = self::ESTIMATE_TOTAL 
);
 
        /**
         * Get the list of method names that did write queries for this 
transaction
@@ -1485,9 +1493,10 @@
        /**
         * Ping the server and try to reconnect if it there is no connection
         *
+        * @param float|null &$rtt Value to store the estimated RTT [optional]
         * @return bool Success or failure
         */
-       public function ping();
+       public function ping( &$rtt = null );
 
        /**
         * Get slave lag. Currently supported only by MySQL.
diff --git a/includes/db/loadbalancer/LoadBalancer.php 
b/includes/db/loadbalancer/LoadBalancer.php
index 65cd3b3..32729dd 100644
--- a/includes/db/loadbalancer/LoadBalancer.php
+++ b/includes/db/loadbalancer/LoadBalancer.php
@@ -1101,7 +1101,7 @@
                        }
                        // Assert that the time to replicate the transaction 
will be sane.
                        // If this fails, then all DB transactions will be 
rollback back together.
-                       $time = $conn->pendingWriteQueryDuration();
+                       $time = $conn->pendingWriteQueryDuration( 
$conn::ESTIMATE_DB_APPLY );
                        if ( $limit > 0 && $time > $limit ) {
                                throw new DBTransactionError(
                                        $conn,
diff --git a/includes/jobqueue/JobRunner.php b/includes/jobqueue/JobRunner.php
index 5f48dca..f25c1ba 100644
--- a/includes/jobqueue/JobRunner.php
+++ b/includes/jobqueue/JobRunner.php
@@ -503,16 +503,21 @@
                if ( $wgJobSerialCommitThreshold !== false && 
$lb->getServerCount() > 1 ) {
                        // Generally, there is one master connection to the 
local DB
                        $dbwSerial = $lb->getAnyOpenConnection( 
$lb->getWriterIndex() );
+                       // We need natively blocking fast locks
+                       if ( $dbwSerial && $dbwSerial->namedLocksEnqueue() ) {
+                               $time = $dbwSerial->pendingWriteQueryDuration( 
$dbwSerial::ESTIMATE_DB_APPLY );
+                               if ( $time < $wgJobSerialCommitThreshold ) {
+                                       $dbwSerial = false;
+                               }
+                       } else {
+                               $dbwSerial = false;
+                       }
                } else {
+                       // There are no slaves or writes are all to foreign DB 
(we don't handle that)
                        $dbwSerial = false;
                }
 
-               if ( !$dbwSerial
-                       || !$dbwSerial->namedLocksEnqueue()
-                       || $dbwSerial->pendingWriteQueryDuration() < 
$wgJobSerialCommitThreshold
-               ) {
-                       // Writes are all to foreign DBs, named locks don't 
form queues,
-                       // or $wgJobSerialCommitThreshold is not reached; 
commit changes now
+               if ( !$dbwSerial ) {
                        wfGetLBFactory()->commitMasterChanges( __METHOD__ );
                        return;
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/307057
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifc2743f2d8cd109840c45cda5028fbb4df55d231
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <asch...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to