BryanDavis has uploaded a new change for review.
https://gerrit.wikimedia.org/r/224313
Change subject: [WIP] Sync /srv/mediawiki-staging to co-masters
......................................................................
[WIP] Sync /srv/mediawiki-staging to co-masters
Add a sync step before updating rsync proxies that will sync the full
contents of /srv/mediawiki-staging with configured co-master servers.
This rsync copy includes the full git data directories from the current
master server.
Bug: T104826
Change-Id: I3d2b4e7495d75540c914b2eb999124ad1ee6f8b0
---
A bin/sync-master
M scap.cfg
M scap/__init__.py
M scap/cli.py
M scap/config.py
M scap/main.py
M scap/tasks.py
M scap/utils.py
8 files changed, 141 insertions(+), 27 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/tools/scap
refs/changes/13/224313/1
diff --git a/bin/sync-master b/bin/sync-master
new file mode 100755
index 0000000..7f0ed5d
--- /dev/null
+++ b/bin/sync-master
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Sync /srv/mediawiki-staging with full git data
+#
+# Copyright © 2015 Wikimedia Foundation and contributors
+
+import os
+import sys
+
+# Add scap package to search path
+script = os.path.realpath(sys.argv[0])
+scap_src = os.path.dirname(os.path.dirname(script))
+sys.path.append(scap_src)
+
+import scap
+scap.SyncMaster.run()
diff --git a/scap.cfg b/scap.cfg
index 57a4ca7..b01fedc 100644
--- a/scap.cfg
+++ b/scap.cfg
@@ -73,6 +73,8 @@
# Local interface that indicates that pybal is in use
pybal_interface: lo:LVS
+# DSH group naming hosts to use as scap masters
+dsh_masters: scap-masters
# DSH group naming hosts to use as scap proxies
dsh_proxies: scap-proxies
# DSH group naming hosts to use as scap targets
diff --git a/scap/__init__.py b/scap/__init__.py
index 84af164..f9802bc 100644
--- a/scap/__init__.py
+++ b/scap/__init__.py
@@ -19,6 +19,7 @@
SyncDir,
SyncDocroot,
SyncFile,
+ SyncMaster,
SyncWikiversions,
UpdateL10n,
)
@@ -36,6 +37,7 @@
'SyncDir',
'SyncDocroot',
'SyncFile',
+ 'SyncMaster',
'SyncWikiversions',
'UpdateL10n',
)
@@ -53,5 +55,6 @@
SyncDir,
SyncDocroot,
SyncFile,
+ SyncMaster,
SyncWikiversions,
UpdateL10n)) # Ignore unused import warning
diff --git a/scap/cli.py b/scap/cli.py
index 241ec3f..7ffc950 100644
--- a/scap/cli.py
+++ b/scap/cli.py
@@ -6,9 +6,6 @@
"""
import argparse
-import collections
-import distutils.version
-import json
import logging
import os
import sys
@@ -90,26 +87,9 @@
:returns: collections.OrderedDict of {version:wikidb} values sorted by
version number in ascending order
"""
- directory = self.config[source_tree + '_dir']
- path = utils.get_realm_specific_filename(
- os.path.join(directory, 'wikiversions.json'),
+ return utils.get_active_wikiversions(
+ self.config[source_tree + '_dir'],
self.config['wmf_realm'], self.config['datacenter'])
-
- with open(path) as f:
- wikiversions = json.load(f)
-
- versions = {}
- for wikidb, version in wikiversions.items():
- version = version[4:] # trim 'php-' from version
- if version not in versions:
- versions[version] = wikidb
-
- # Convert to list of (version, db) tuples sorted by version number
- # and then convert that list to an OrderedDict
- sorted_versions = collections.OrderedDict(sorted(versions.iteritems(),
- key=lambda v: distutils.version.LooseVersion(v[0])))
-
- return sorted_versions
def _parse_arguments(self, argv):
"""Parse command line arguments.
diff --git a/scap/config.py b/scap/config.py
index b4a4348..07ff76e 100644
--- a/scap/config.py
+++ b/scap/config.py
@@ -30,6 +30,8 @@
'apache_pid_file': '/var/run/apache2/apache2.pid',
'pybal_interface': 'lo:LVS',
'dsh_targets': 'mediawiki-installation',
+ 'dsh_masters': 'scap-masters',
+ 'dsh_proxies': 'scap-proxies',
}
diff --git a/scap/main.py b/scap/main.py
index 70402b0..058a8d2 100644
--- a/scap/main.py
+++ b/scap/main.py
@@ -11,6 +11,7 @@
import netifaces
import os
import psutil
+import socket
import subprocess
from . import cli
@@ -39,6 +40,19 @@
with utils.lock(self.config['lock_file']):
self._before_cluster_sync()
+
+ # Update masters
+ masters = self._get_master_list()
+ with log.Timer('sync-masters', self.get_stats()):
+ update_masters = ssh.Job(masters, user=self.config['ssh_user'])
+ update_masters.exclude_hosts([socket.getfqdn()])
+ update_masters.command(self._master_sync_command())
+ update_masters.progress('sync-masters')
+ succeeded, failed = update_masters.run()
+ if failed:
+ self.get_logger().warning(
+ '%d masters had sync errors', failed)
+ self.soft_errors = True
# Update proxies
proxies = self._get_proxy_list()
@@ -77,10 +91,23 @@
def _before_cluster_sync(self):
pass
+ def _get_master_list(self):
+ """Get list of deploy master hostnames that should be updated before
+ the rest of the cluster."""
+ return utils.read_dsh_hosts_file(self.config['dsh_masters'])
+
def _get_proxy_list(self):
"""Get list of sync proxy hostnames that should be updated before the
rest of the cluster."""
return utils.read_dsh_hosts_file(self.config['dsh_proxies'])
+
+ def _master_sync_command(self):
+ """Synchronization command to run on the master hosts."""
+ cmd = [self.get_script_path('sync-master')]
+ if self.verbose:
+ cmd.append('--verbose')
+ cmd.append(socket.getfqdn())
+ return cmd
def _proxy_sync_command(self):
"""Synchronization command to run on the proxy hosts."""
@@ -190,6 +217,7 @@
#. Compile wikiversions.json to cdb in deploy directory
#. Update l10n files in staging area
#. Compute git version information
+ #. Ask scap masters to sync with current master
#. Ask scap proxies to sync with master server
#. Ask apaches to sync with fastest rsync server
#. Ask apaches to rebuild l10n CDB files
@@ -288,6 +316,19 @@
return exit_status
+class SyncMaster(cli.Application):
+ """Sync local MediaWiki staging directory with deploy server state."""
+
+ @cli.argument('master', help='Master rsync server to copy from')
+ def main(self, *extra_args):
+ tasks.sync_master(
+ self.config,
+ master=self.arguments.master,
+ verbose=self.verbose
+ )
+ return 0
+
+
class SyncCommon(cli.Application):
"""Sync local MediaWiki deployment directory with deploy server state."""
diff --git a/scap/tasks.py b/scap/tasks.py
index 2fa6cea..874074a 100644
--- a/scap/tasks.py
+++ b/scap/tasks.py
@@ -29,9 +29,6 @@
'--delay-updates',
'--compress',
'--delete',
- '--exclude=**/.svn/lock',
- '--exclude=**/.git/objects',
- '--exclude=**/.git/**/objects',
'--exclude=**/cache/l10n/*.cdb',
'--no-perms',
]
@@ -239,6 +236,47 @@
purge.progress('l10n purge').run()
+def sync_master(cfg, master, verbose=False):
+ """Sync local staging dir with upstream rsync server's copy
+
+ Rsync from ``server::common`` to the local staging directory.
+
+ :param cfg: Dict of global configuration values.
+ :param master: Master server to sync with
+ :param verbose: Enable verbose logging?
+ """
+ logger = logging.getLogger('sync_master')
+
+ if not os.path.isdir(cfg['stage_dir']):
+ raise Exception((
+ 'rsync target directory %s not found. Ask root to create it '
+ '(should belong to root:wikidev).') % cfg['stage_dir'])
+
+ # Execute rsync fetch locally via sudo
+ rsync = ['sudo', '-u', 'mwdeploy', '-g', 'wikidev', '-n', '--']
+ rsync.extend(DEFAULT_RSYNC_ARGS)
+ if verbose:
+ rsync.append('--verbose')
+
+ rsync.append('%s::common' % master)
+ rsync.append(cfg['stage_dir'])
+
+ logger.info('Copying to %s from %s', socket.getfqdn(), master)
+ logger.debug('Running rsync command: `%s`', ' '.join(rsync))
+ stats = log.Stats(cfg['statsd_host'], int(cfg['statsd_port']))
+ with log.Timer('rsync master', stats):
+ subprocess.check_call(rsync)
+
+ # Rebuild the CDB files from the JSON versions
+ versions = utils.get_active_wikiversions(
+ cfg['stage_dir'], cfg['wmf_realm'], cfg['datacenter'])
+ use_cores = max(multiprocessing.cpu_count() - 2, 1)
+ for version, wikidb in versions.items():
+ cache_dir = os.path.join(cfg['stage_dir'],
+ 'php-%s' % version, 'cache', 'l10n')
+ merge_cdb_updates(cache_dir, use_cores, True, True)
+
+
def sync_common(cfg, include=None, sync_from=None, verbose=False):
"""Sync local deploy dir with upstream rsync server's copy
@@ -271,6 +309,8 @@
# Execute rsync fetch locally via sudo
rsync = ['sudo', '-u', 'mwdeploy', '-n', '--'] + DEFAULT_RSYNC_ARGS
+ # Exclude .git metadata
+ rsync.append('--exclude=**/.git')
if verbose:
rsync.append('--verbose')
diff --git a/scap/utils.py b/scap/utils.py
index ff1a31d..605dbdf 100644
--- a/scap/utils.py
+++ b/scap/utils.py
@@ -5,7 +5,9 @@
Contains misc utility functions.
"""
+import collections
import contextlib
+import distutils.version
import errno
import fcntl
import hashlib
@@ -274,13 +276,14 @@
return crc.hexdigest()
-def read_dsh_hosts_file(path):
+def read_dsh_hosts_file(group):
"""Reads hosts from a file into a list.
Blank lines and comments are ignored.
"""
+ path = os.path.join('/etc/dsh/group', group)
try:
- with open(os.path.join('/etc/dsh/group', path)) as hosts_file:
+ with open(path) as hosts_file:
return re.findall(r'^[\w\.\-]+', hosts_file.read(), re.MULTILINE)
except IOError as e:
raise IOError(e.errno, e.strerror, path)
@@ -464,3 +467,29 @@
return int(open(path).read().strip())
except IOError as e:
raise IOError(e.errno, e.strerror, path)
+
+
+def get_active_wikiversions(directory, realm, datacenter):
+ """Get an ordered collection of active MediaWiki versions.
+
+ :returns: collections.OrderedDict of {version:wikidb} values sorted by
+ version number in ascending order
+ """
+ path = get_realm_specific_filename(
+ os.path.join(directory, 'wikiversions.json'), realm, datacenter)
+
+ with open(path) as f:
+ wikiversions = json.load(f)
+
+ versions = {}
+ for wikidb, version in wikiversions.items():
+ version = version[4:] # trim 'php-' from version
+ if version not in versions:
+ versions[version] = wikidb
+
+ # Convert to list of (version, db) tuples sorted by version number
+ # and then convert that list to an OrderedDict
+ sorted_versions = collections.OrderedDict(sorted(versions.iteritems(),
+ key=lambda v: distutils.version.LooseVersion(v[0])))
+
+ return sorted_versions
--
To view, visit https://gerrit.wikimedia.org/r/224313
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I3d2b4e7495d75540c914b2eb999124ad1ee6f8b0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/tools/scap
Gerrit-Branch: master
Gerrit-Owner: BryanDavis <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits