BryanDavis has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/224313

Change subject: [WIP] Sync /srv/mediawiki-staging to co-masters
......................................................................

[WIP] Sync /srv/mediawiki-staging to co-masters

Add a sync step before updating rsync proxies that will sync the full
contents of /srv/mediawiki-staging with configured co-master servers.
This rsync copy includes the full git data directories from the current
master server.

Bug: T104826
Change-Id: I3d2b4e7495d75540c914b2eb999124ad1ee6f8b0
---
A bin/sync-master
M scap.cfg
M scap/__init__.py
M scap/cli.py
M scap/config.py
M scap/main.py
M scap/tasks.py
M scap/utils.py
8 files changed, 141 insertions(+), 27 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/tools/scap 
refs/changes/13/224313/1

diff --git a/bin/sync-master b/bin/sync-master
new file mode 100755
index 0000000..7f0ed5d
--- /dev/null
+++ b/bin/sync-master
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Sync /srv/mediawiki-staging with full git data
+#
+# Copyright © 2015 Wikimedia Foundation and contributors
+
+import os
+import sys
+
+# Add scap package to search path
+script = os.path.realpath(sys.argv[0])
+scap_src = os.path.dirname(os.path.dirname(script))
+sys.path.append(scap_src)
+
+import scap
+scap.SyncMaster.run()
diff --git a/scap.cfg b/scap.cfg
index 57a4ca7..b01fedc 100644
--- a/scap.cfg
+++ b/scap.cfg
@@ -73,6 +73,8 @@
 # Local interface that indicates that pybal is in use
 pybal_interface: lo:LVS
 
+# DSH group naming hosts to use as scap masters
+dsh_masters: scap-masters
 # DSH group naming hosts to use as scap proxies
 dsh_proxies: scap-proxies
 # DSH group naming hosts to use as scap targets
diff --git a/scap/__init__.py b/scap/__init__.py
index 84af164..f9802bc 100644
--- a/scap/__init__.py
+++ b/scap/__init__.py
@@ -19,6 +19,7 @@
     SyncDir,
     SyncDocroot,
     SyncFile,
+    SyncMaster,
     SyncWikiversions,
     UpdateL10n,
 )
@@ -36,6 +37,7 @@
     'SyncDir',
     'SyncDocroot',
     'SyncFile',
+    'SyncMaster',
     'SyncWikiversions',
     'UpdateL10n',
 )
@@ -53,5 +55,6 @@
     SyncDir,
     SyncDocroot,
     SyncFile,
+    SyncMaster,
     SyncWikiversions,
     UpdateL10n))  # Ignore unused import warning
diff --git a/scap/cli.py b/scap/cli.py
index 241ec3f..7ffc950 100644
--- a/scap/cli.py
+++ b/scap/cli.py
@@ -6,9 +6,6 @@
 
 """
 import argparse
-import collections
-import distutils.version
-import json
 import logging
 import os
 import sys
@@ -90,26 +87,9 @@
         :returns: collections.OrderedDict of {version:wikidb} values sorted by
                   version number in ascending order
         """
-        directory = self.config[source_tree + '_dir']
-        path = utils.get_realm_specific_filename(
-            os.path.join(directory, 'wikiversions.json'),
+        return utils.get_active_wikiversions(
+            self.config[source_tree + '_dir'],
             self.config['wmf_realm'], self.config['datacenter'])
-
-        with open(path) as f:
-            wikiversions = json.load(f)
-
-        versions = {}
-        for wikidb, version in wikiversions.items():
-            version = version[4:]  # trim 'php-' from version
-            if version not in versions:
-                versions[version] = wikidb
-
-        # Convert to list of (version, db) tuples sorted by version number
-        # and then convert that list to an OrderedDict
-        sorted_versions = collections.OrderedDict(sorted(versions.iteritems(),
-            key=lambda v: distutils.version.LooseVersion(v[0])))
-
-        return sorted_versions
 
     def _parse_arguments(self, argv):
         """Parse command line arguments.
diff --git a/scap/config.py b/scap/config.py
index b4a4348..07ff76e 100644
--- a/scap/config.py
+++ b/scap/config.py
@@ -30,6 +30,8 @@
     'apache_pid_file': '/var/run/apache2/apache2.pid',
     'pybal_interface': 'lo:LVS',
     'dsh_targets': 'mediawiki-installation',
+    'dsh_masters': 'scap-masters',
+    'dsh_proxies': 'scap-proxies',
 }
 
 
diff --git a/scap/main.py b/scap/main.py
index 70402b0..058a8d2 100644
--- a/scap/main.py
+++ b/scap/main.py
@@ -11,6 +11,7 @@
 import netifaces
 import os
 import psutil
+import socket
 import subprocess
 
 from . import cli
@@ -39,6 +40,19 @@
 
         with utils.lock(self.config['lock_file']):
             self._before_cluster_sync()
+
+            # Update masters
+            masters = self._get_master_list()
+            with log.Timer('sync-masters', self.get_stats()):
+                update_masters = ssh.Job(masters, user=self.config['ssh_user'])
+                update_masters.exclude_hosts([socket.getfqdn()])
+                update_masters.command(self._master_sync_command())
+                update_masters.progress('sync-masters')
+                succeeded, failed = update_masters.run()
+                if failed:
+                    self.get_logger().warning(
+                        '%d masters had sync errors', failed)
+                    self.soft_errors = True
 
             # Update proxies
             proxies = self._get_proxy_list()
@@ -77,10 +91,23 @@
     def _before_cluster_sync(self):
         pass
 
+    def _get_master_list(self):
+        """Get list of deploy master hostnames that should be updated before
+        the rest of the cluster."""
+        return utils.read_dsh_hosts_file(self.config['dsh_masters'])
+
     def _get_proxy_list(self):
         """Get list of sync proxy hostnames that should be updated before the
         rest of the cluster."""
         return utils.read_dsh_hosts_file(self.config['dsh_proxies'])
+
+    def _master_sync_command(self):
+        """Synchronization command to run on the master hosts."""
+        cmd = [self.get_script_path('sync-master')]
+        if self.verbose:
+            cmd.append('--verbose')
+        cmd.append(socket.getfqdn())
+        return cmd
 
     def _proxy_sync_command(self):
         """Synchronization command to run on the proxy hosts."""
@@ -190,6 +217,7 @@
     #. Compile wikiversions.json to cdb in deploy directory
     #. Update l10n files in staging area
     #. Compute git version information
+    #. Ask scap masters to sync with current master
     #. Ask scap proxies to sync with master server
     #. Ask apaches to sync with fastest rsync server
     #. Ask apaches to rebuild l10n CDB files
@@ -288,6 +316,19 @@
         return exit_status
 
 
+class SyncMaster(cli.Application):
+    """Sync local MediaWiki staging directory with deploy server state."""
+
+    @cli.argument('master', help='Master rsync server to copy from')
+    def main(self, *extra_args):
+        tasks.sync_master(
+            self.config,
+            master=self.arguments.master,
+            verbose=self.verbose
+        )
+        return 0
+
+
 class SyncCommon(cli.Application):
     """Sync local MediaWiki deployment directory with deploy server state."""
 
diff --git a/scap/tasks.py b/scap/tasks.py
index 2fa6cea..874074a 100644
--- a/scap/tasks.py
+++ b/scap/tasks.py
@@ -29,9 +29,6 @@
     '--delay-updates',
     '--compress',
     '--delete',
-    '--exclude=**/.svn/lock',
-    '--exclude=**/.git/objects',
-    '--exclude=**/.git/**/objects',
     '--exclude=**/cache/l10n/*.cdb',
     '--no-perms',
 ]
@@ -239,6 +236,47 @@
     purge.progress('l10n purge').run()
 
 
+def sync_master(cfg, master, verbose=False):
+    """Sync local staging dir with upstream rsync server's copy
+
+    Rsync from ``server::common`` to the local staging directory.
+
+    :param cfg: Dict of global configuration values.
+    :param master: Master server to sync with
+    :param verbose: Enable verbose logging?
+    """
+    logger = logging.getLogger('sync_master')
+
+    if not os.path.isdir(cfg['stage_dir']):
+        raise Exception((
+            'rsync target directory %s not found. Ask root to create it '
+            '(should belong to root:wikidev).') % cfg['stage_dir'])
+
+    # Execute rsync fetch locally via sudo
+    rsync = ['sudo', '-u', 'mwdeploy', '-g', 'wikidev', '-n', '--']
+    rsync.extend(DEFAULT_RSYNC_ARGS)
+    if verbose:
+        rsync.append('--verbose')
+
+    rsync.append('%s::common' % master)
+    rsync.append(cfg['stage_dir'])
+
+    logger.info('Copying to %s from %s', socket.getfqdn(), master)
+    logger.debug('Running rsync command: `%s`', ' '.join(rsync))
+    stats = log.Stats(cfg['statsd_host'], int(cfg['statsd_port']))
+    with log.Timer('rsync master', stats):
+        subprocess.check_call(rsync)
+
+    # Rebuild the CDB files from the JSON versions
+    versions = utils.get_active_wikiversions(
+        cfg['stage_dir'], cfg['wmf_realm'], cfg['datacenter'])
+    use_cores = max(multiprocessing.cpu_count() - 2, 1)
+    for version, wikidb in versions.items():
+        cache_dir = os.path.join(cfg['stage_dir'],
+            'php-%s' % version, 'cache', 'l10n')
+        merge_cdb_updates(cache_dir, use_cores, True, True)
+
+
 def sync_common(cfg, include=None, sync_from=None, verbose=False):
     """Sync local deploy dir with upstream rsync server's copy
 
@@ -271,6 +309,8 @@
 
     # Execute rsync fetch locally via sudo
     rsync = ['sudo', '-u', 'mwdeploy', '-n', '--'] + DEFAULT_RSYNC_ARGS
+    # Exclude .git metadata
+    rsync.append('--exclude=**/.git')
     if verbose:
         rsync.append('--verbose')
 
diff --git a/scap/utils.py b/scap/utils.py
index ff1a31d..605dbdf 100644
--- a/scap/utils.py
+++ b/scap/utils.py
@@ -5,7 +5,9 @@
     Contains misc utility functions.
 
 """
+import collections
 import contextlib
+import distutils.version
 import errno
 import fcntl
 import hashlib
@@ -274,13 +276,14 @@
     return crc.hexdigest()
 
 
-def read_dsh_hosts_file(path):
+def read_dsh_hosts_file(group):
     """Reads hosts from a file into a list.
 
     Blank lines and comments are ignored.
     """
+    path = os.path.join('/etc/dsh/group', group)
     try:
-        with open(os.path.join('/etc/dsh/group', path)) as hosts_file:
+        with open(path) as hosts_file:
             return re.findall(r'^[\w\.\-]+', hosts_file.read(), re.MULTILINE)
     except IOError as e:
         raise IOError(e.errno, e.strerror, path)
@@ -464,3 +467,29 @@
         return int(open(path).read().strip())
     except IOError as e:
         raise IOError(e.errno, e.strerror, path)
+
+
+def get_active_wikiversions(directory, realm, datacenter):
+    """Get an ordered collection of active MediaWiki versions.
+
+    :returns: collections.OrderedDict of {version:wikidb} values sorted by
+                version number in ascending order
+    """
+    path = get_realm_specific_filename(
+        os.path.join(directory, 'wikiversions.json'), realm, datacenter)
+
+    with open(path) as f:
+        wikiversions = json.load(f)
+
+    versions = {}
+    for wikidb, version in wikiversions.items():
+        version = version[4:]  # trim 'php-' from version
+        if version not in versions:
+            versions[version] = wikidb
+
+    # Convert to list of (version, db) tuples sorted by version number
+    # and then convert that list to an OrderedDict
+    sorted_versions = collections.OrderedDict(sorted(versions.iteritems(),
+        key=lambda v: distutils.version.LooseVersion(v[0])))
+
+    return sorted_versions

-- 
To view, visit https://gerrit.wikimedia.org/r/224313
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3d2b4e7495d75540c914b2eb999124ad1ee6f8b0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/tools/scap
Gerrit-Branch: master
Gerrit-Owner: BryanDavis <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to