https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104263

Revision: 104263
Author:   aaron
Date:     2011-11-26 07:07:50 +0000 (Sat, 26 Nov 2011)
Log Message:
-----------
* Optimized DBFileLockManager::doLockingQueryAll with a quorum and better use 
of cache
* Various fixes after more testing

Modified Paths:
--------------
    branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php
    
branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
    branches/FileBackend/phase3/includes/filerepo/backend/FileLockManager.php

Modified: branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php
===================================================================
--- branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php       
2011-11-26 00:32:30 UTC (rev 104262)
+++ branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php       
2011-11-26 07:07:50 UTC (rev 104263)
@@ -65,6 +65,8 @@
         *     )
         * );
         * </code>
+        * If any serious errors occur, the operations will be rolled back.
+        * However, the 'ignoreErrors' parameter can be used on any operation 
to ignore errors.
         * 
         * @param Array $ops Array of arrays containing N operations to execute 
IN ORDER
         * @return Status
@@ -321,6 +323,7 @@
                                // Get params for this operation
                                $params = $operation;
                                unset( $params['operation'] ); // don't need 
this
+                               unset( $params['ignoreErrors'] ); // don't need 
this
                                // Append the FileOp class
                                $performOps[] = new $class( $params );
                        } else {
@@ -342,7 +345,6 @@
                foreach ( $performOps as $index => $fileOp ) {
                        $filesToLock = array_merge( $filesToLock, 
$fileOp->storagePathsToLock() );
                }
-               $filesToLock = array_unique( $filesToLock ); // avoid warnings
 
                // Try to lock those files...
                $status->merge( $this->lockFiles( $filesToLock ) );

Modified: 
branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
===================================================================
--- 
branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php 
    2011-11-26 00:32:30 UTC (rev 104262)
+++ 
branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php 
    2011-11-26 07:07:50 UTC (rev 104263)
@@ -79,7 +79,6 @@
                                foreach ( $performOps as $index => $fileOp ) {
                                        $filesToLock = array_merge( 
$filesToLock, $fileOp->storagePathsToLock() );
                                }
-                               $filesToLock = array_unique( $filesToLock ); // 
avoid warnings
                        }
                }
 

Modified: 
branches/FileBackend/phase3/includes/filerepo/backend/FileLockManager.php
===================================================================
--- branches/FileBackend/phase3/includes/filerepo/backend/FileLockManager.php   
2011-11-26 00:32:30 UTC (rev 104262)
+++ branches/FileBackend/phase3/includes/filerepo/backend/FileLockManager.php   
2011-11-26 07:07:50 UTC (rev 104263)
@@ -28,7 +28,7 @@
         * @return Status 
         */
        final public function lock( array $paths, $type = self::LOCK_EX ) {
-               $keys = array_map( 'sha1', $paths );
+               $keys = array_unique( array_map( 'sha1', $paths ) );
                return $this->doLock( $keys, $type );
        }
 
@@ -39,7 +39,7 @@
         * @return Status 
         */
        final public function unlock( array $paths ) {
-               $keys = array_map( 'sha1', $paths );
+               $keys = array_unique( array_map( 'sha1', $paths ) );
                return $this->doUnlock( $keys, 0 );
        }
 
@@ -80,7 +80,7 @@
                $this->lockDir = $config['lockDirectory'];
        }
 
-       function doLock( array $keys, $type ) {
+       protected function doLock( array $keys, $type ) {
                $status = Status::newGood();
 
                $lockedKeys = array(); // files locked in this attempt
@@ -103,7 +103,7 @@
                return $status;
        }
 
-       function doUnlock( array $keys, $type ) {
+       protected function doUnlock( array $keys, $type ) {
                $status = Status::newGood();
 
                foreach ( $keys as $key ) {
@@ -224,9 +224,9 @@
  *
  * All lock requests for a resource, identified by a hash string, will
  * map to one bucket. Each bucket maps to one or several peer DB servers,
- * each of which have a `file_locks` table with row-level locking.
+ * each having a `file_locks` table with row-level locking.
  *
- * All peer servers must agree to a lock in order for it to be acquired.
+ * A majority of peer servers must agree for a lock to be acquired.
  * As long as one peer server is up, lock requests will not be blocked
  * just because another peer server cannot be contacted. A global status
  * cache can be setup to track servers that recently missed queries; such
@@ -242,6 +242,7 @@
        /** @var BagOStuff */
        protected $statusCache;
 
+       protected $trustCache; // boolean
        protected $webTimeout; // integer number of seconds
        protected $cliTimeout; // integer number of seconds
        protected $safeDelay; // integer number of seconds
@@ -254,18 +255,21 @@
        /**
         * Construct a new instance from configuration.
         * $config paramaters include:
-        *     'dbsByBucket' : Array of 1-16 consecutive integer keys, starting 
from 0,
-        *                     with a list of database names (peers) as values.
-        *                     Each DB should be on its own server.
-        *     'statusCache' : $wgMemc if set to a global memcached instance. 
[optional]
-        *                     This tracks peer servers that couldn't be 
queried recently.
-        *     'webTimeout'  : Connection timeout (seconds) for non-CLI 
scripts. [optional]
-        *                     This tells the DB server how long to wait before 
giving up
-        *                     and releasing all the locks made in a session 
transaction.
-        *     'cliTimeout'  : Connection timeout (seconds) for CLI scripts. 
[optional]
+        *     'dbsByBucket' : Array of 1-16 consecutive integer keys, starting 
from 0, with
+        *                     a list of DB names (peers) as values. Each list 
should have
+        *                     an odd number of items and each DB should have 
its own server.
+        *     'webTimeout'  : Lock timeout (seconds) for non-CLI scripts. 
[optional]
+        *                     This tells the DB server how long to wait before 
assuming
+        *                     connection failure and releasing all the locks 
for a session.
+        *     'cliTimeout'  : Lock timeout (seconds) for CLI scripts. 
[optional]
+        *                     This tells the DB server how long to wait before 
assuming
+        *                     connection failure and releasing all the locks 
for a session.
         *     'safeDelay'   : Seconds to mistrust a DB after restart/query 
loss. [optional]
         *                     This should reflect the highest 
max_execution_time that PHP
         *                     scripts might use on a wiki. Locks are lost on 
server restart.
+        *     'cache'       : $wgMemc (if set to a global memcached instance). 
[optional]
+        *                     This tracks peer servers that couldn't be 
queried recently.
+        *     'trustCache'  : Assume cache knows all servers missing queries 
recently. [optional]
         *
         * @param Array $config 
         */
@@ -280,25 +284,28 @@
                                wfWarn( "No valid key for bucket $i in 
dbsByBucket." );
                        }
                }
-               if ( isset( $config['statusCache'] ) ) {
-                       $this->statusCache = $config['statusCache'];
-               }
                if ( isset( $config['webTimeout'] ) ) {
                        $this->webTimeout = $config['webTimeout'];
                } else {
-                       $this->webTimeout = ini_get( 'max_execution_time' ) // 
disallow 0
-                               ? ini_get( 'max_execution_time' )
-                               : 60; // some sane number
+                       $met = ini_get( 'max_execution_time' );
+                       $this->webTimeout = $met ? $met : 60; // use some same 
amount if 0
                }
                $this->cliTimeout = isset( $config['cliTimeout'] )
                        ? $config['cliTimeout']
-                       : 60; // some sane number
+                       : 60; // some sane amount
                $this->safeDelay = isset( $config['safeDelay'] )
                        ? $config['safeDelay']
-                       : max( $this->cliTimeout, $this->webTimeout );
+                       : max( $this->cliTimeout, $this->webTimeout ); // cover 
worst case
+               if ( isset( $config['cache'] ) && $config['cache'] instanceof 
BagOStuff ) {
+                       $this->statusCache = $config['cache'];
+                       $this->trustCache = ( !empty( $config['trustCache'] ) 
&& $this->safeDelay > 0 );
+               } else {
+                       $this->statusCache = null;
+                       $this->trustCache = false;
+               }
        }
 
-       function doLock( array $keys, $type ) {
+       protected function doLock( array $keys, $type ) {
                $status = Status::newGood();
 
                $keysToLock = array();
@@ -310,10 +317,6 @@
                                $status->warning( 'lockmanager-alreadylocked', 
$key );
                        } else {
                                $bucket = $this->getBucketFromKey( $key );
-                               if ( !$bucket ) { // config error?
-                                       $status->fatal( 
'lockmanager-fail-config', $key );
-                                       return $status;
-                               }
                                $keysToLock[$bucket][] = $key;
                        }
                }
@@ -325,19 +328,19 @@
                        // (a) First server is up; common case
                        // (b) First server is down but a peer is up
                        // (c) First server is down and no peer are up (or none 
defined)
-                       $count = $this->doLockingQueryAll( $bucket, $keys, 
$type );
-                       if ( $count == -1 ) {
+                       $res = $this->doLockingQueryAll( $bucket, $keys, $type 
);
+                       if ( $res === 'cantacquire' ) {
                                // Resources already locked by another process.
                                // Abort and unlock everything we just locked.
                                $status->fatal( 'lockmanager-fail-acquirelocks' 
);
                                $status->merge( $this->doUnlock( $lockedKeys, 
$type ) );
                                return $status;
-                       } elseif ( $count <= 0 ) {
+                       } elseif ( $res !== true ) {
                                // Couldn't contact any servers for this bucket.
                                // Abort and unlock everything we just locked.
                                $status->fatal( 'lockmanager-fail-db-bucket', 
$bucket );
                                $status->merge( $this->doUnlock( $lockedKeys, 
$type ) );
-                               return $status; // error
+                               return $status;
                        }
                        // Record these locks as active
                        foreach ( $keys as $key ) {
@@ -350,7 +353,7 @@
                return $status;
        }
 
-       function doUnlock( array $keys, $type ) {
+       protected function doUnlock( array $keys, $type ) {
                $status = Status::newGood();
 
                foreach ( $keys as $key ) {
@@ -424,39 +427,50 @@
        }
 
        /**
-        * Attept to acquire a lock on the primary server as well
-        * as all peer servers for a bucket. Return value is either:
-        *    a) The number of servers, considered reliable, where the locks 
were acquired
-        *    b) -1; if any server claimed that a resource was already locked
+        * Attempt to acquire locks with the peers for a bucket.
         * This should avoid throwing any exceptions.
         *
         * @param $bucket integer
         * @param $keys Array List of resource keys to lock
         * @param $type integer FileLockManager::LOCK_EX or 
FileLockManager::LOCK_SH
-        * @return integer
+        * @return bool|string One of (true, 'cantacquire', 'dberrors')
         */
        protected function doLockingQueryAll( $bucket, array $keys, $type ) {
-               $locksMade = 0; // locks made on trustable servers
+               $yesVotes = 0; // locks made on trustable servers
+               $votesLeft = count( $this->dbsByBucket[$bucket] ); // remaining 
servers
+               $quorum = floor( $votesLeft/2 + 1 ); // simple majority
                foreach ( $this->dbsByBucket[$bucket] as $server ) {
-                       try {
-                               $this->doLockingQuery( $server, $keys, $type );
-                               // Servers that have any signs of lock loss are 
treated as suspect
-                               if ( $this->checkReliable( $server ) ) {
-                                       ++$locksMade; // success for this peer
-                               } elseif ( !$this->statusCache ) {
-                                       // If we are only checking for 
restarts, this won't catch
-                                       // cases were are only server got a 
lock and was restarted.
-                                       return 0;
+                       // Check that server is not *known* to have issues
+                       if ( $this->checkStatusCache( $server ) ) {
+                               try {
+                                       // Attempt to acquire the lock on this 
server
+                                       $this->doLockingQuery( $server, $keys, 
$type );
+                                       // Check that server has no signs of 
lock loss
+                                       if ( $this->checkUptime( $server ) ) {
+                                               ++$yesVotes; // success for 
this peer
+                                               if ( $yesVotes >= $quorum ) {
+                                                       return true; // lock 
obtained
+                                               }
+                                       }
+                               } catch ( DBError $e ) {
+                                       if ( $this->lastErrorIndicatesLocked( 
$server ) ) {
+                                               return 'cantacquire'; // 
vetoed; resource locked
+                                       } else { // can't connect?
+                                               $this->cacheRecordFailure( 
$server );
+                                       }
                                }
-                       } catch ( DBError $e ) {
-                               if ( $this->lastErrorIndicatesLocked( $server ) 
) {
-                                       return -1; // resource locked
-                               } else { // can't connect?
-                                       $this->recordFailure( $server );
-                               }
                        }
+                       $votesLeft--;
+                       $votesNeeded = $quorum - $yesVotes;
+                       if ( $votesNeeded > $votesLeft ) {
+                               break; // short-circuit
+                       }
                }
-               return $locksMade;
+               // At this point, we must not have meet the quorum
+               if ( $yesVotes > 0 && $this->trustCache ) {
+                       return true; // we are trusting the cache; may 
comprimise correctness
+               }
+               return 'dberrors'; // not enough votes to ensure correctness
        }
 
        /**
@@ -493,29 +507,19 @@
        }
 
        /**
-        * Checks if none of the following happened:
-        * a) The DB server recently restarted.
-        *    This curtails the problem of locks falling off when servers 
restart.
-        * b) The DB server has recently missed lock queries.
-        *    This curtails the problem of peers occasionally not getting locks.
+        * Checks if the DB server did not recently restart.
+        * This curtails the problem of locks falling off when servers restart.
         * 
         * @param $server string
         * @return bool
         */
-       protected function checkReliable( $server ) {
+       protected function checkUptime( $server ) {
                if ( isset( $this->activeConns[$server] ) ) { // sanity
                        if ( $this->safeDelay > 0 ) {
                                $db = $this->activeConns[$server];
                                if ( $db->getServerUptime() < $this->safeDelay 
) {
                                        return false;
                                }
-                               if ( $this->statusCache ) {
-                                       $key = $this->getMissKey( $server );
-                                       $misses = $this->statusCache->get( $key 
);
-                                       if ( $misses > 0 ) {
-                                               return false;
-                                       }
-                               }
                        }
                        return true;
                }
@@ -523,7 +527,25 @@
        }
 
        /**
-        * Log a lock request failure to the log server.
+        * Checks if the DB server has not recently missed lock queries.
+        * This curtails the problem of peers occasionally not getting locks.
+        * 
+        * @param $server string
+        * @return bool
+        */
+       protected function checkStatusCache( $server ) {
+               if ( $this->statusCache && $this->safeDelay > 0 ) {
+                       $key = $this->getMissKey( $server );
+                       $misses = $this->statusCache->get( $key );
+                       if ( $misses > 0 ) {
+                               return false;
+                       }
+               }
+               return true;
+       }
+
+       /**
+        * Log a lock request failure to the cache.
         *
         * Worst case scenario is that a resource lock was only
         * on one peer and then that peer is restarted or goes down.
@@ -532,7 +554,7 @@
         * @param $server string
         * @return bool Success
         */
-       protected function recordFailure( $server ) {
+       protected function cacheRecordFailure( $server ) {
                if ( $this->statusCache && $this->safeDelay > 0 ) {
                        $key = $this->getMissKey( $server );
                        $misses = $this->statusCache->get( $key );
@@ -574,11 +596,11 @@
 class NullFileLockManager extends FileLockManager {
        function __construct( array $config ) {}
 
-       function doLock( array $keys, $type ) {
+       protected function doLock( array $keys, $type ) {
                return Status::newGood();
        }
 
-       function doUnlock( array $keys, $type ) {
+       protected function doUnlock( array $keys, $type ) {
                return Status::newGood();
        }
 }


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to