coren has uploaded a new change for review. https://gerrit.wikimedia.org/r/224064
Change subject: Labs: Script to back labstore filesystems up ...................................................................... Labs: Script to back labstore filesystems up This will create the snapshot as needed to make a time- consistent copy. TODO: clean snapshots up as free space is becoming low or they are becoming full. A possibility to consider is to have the script /also/ create a safety snapshot at the destination before starting the rsync, but it's not clear if introducing more knowledge of the destination layout here is wise. Bug: T105027 Change-Id: I078179f84a323957a4124f502aea3073d5c993b5 --- A modules/labstore/files/replication-rsync.conf A modules/labstore/files/storage-replicate M modules/labstore/manifests/fileserver.pp 3 files changed, 196 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/64/224064/1 diff --git a/modules/labstore/files/replication-rsync.conf b/modules/labstore/files/replication-rsync.conf new file mode 100644 index 0000000..fb94d9a --- /dev/null +++ b/modules/labstore/files/replication-rsync.conf @@ -0,0 +1,10 @@ +# Do not back log and default output files up +# (they tend to grow a lot, and are not valuable +# enough to keep for DR purposes) +- /tools/**/*.log +- /tools/**/*.err +- /tools/**/*.out +# Not relevant to rsync +- /lost+found +# Allow endusers to filter their own backups, too +: .nobackup diff --git a/modules/labstore/files/storage-replicate b/modules/labstore/files/storage-replicate new file mode 100755 index 0000000..7989461 --- /dev/null +++ b/modules/labstore/files/storage-replicate @@ -0,0 +1,169 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright © 2015 Marc-André Pelletier <[email protected]> +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# +## +## THIS FILE IS MANAGED BY PUPPET +## +## Source: modules/labstore/storage-replicate +## From: tbd +## + +## +## storage-replicate +## +## usage: storage-replicate <mountpoint> <dest> +## +## Replicates the directory at <mountpoint> (which must have a +## volume mounted) to the destination specified +## by <dest>. The actual copy is done with rsync, so any value +## acceptable to rsync as destination can be used here. A +## snapshot will be taken, but not discarded at the end. +## +## This script provides for locking to avoid more than one +## replication taking place at a time and making a mess of things. +## The presence of a file named 'skipped' in the $lockdir means +## that the replication is running late, with each line in the +## file being a skipped attempt because a prior rsync was already +## taking place. It may be wise to make an icinga check for +## the presence and number of lines in this file. +## +## This script is intended to be run (daily?) by cron, but it +## it safe to invoke manually - it will do nothing if there is +## already an rsync in progress. +## + +import argparse +import re +import datetime +import subprocess +import sys +import logging +import logging.handlers +import os + +def system(*cmd): + sub = subprocess.Popen(list(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (out, err) = sub.communicate() + if sub.returncode: + err = err.splitlines(False)[0].strip() + if not err or err=='': + if sub.returncode < 0: + err = "killed by signal %d" % -sub.returncode + else: + err = "exited with %d" % sub.returncode + return (None, err) + return (out, None) + +class Lockdir: + def __init__(self, path): + self.path = path + self.mountpoint = "%s/snapshot" % path + self.err = None + + def __enter__(self): + try: + os.mkdir(self.path, 0700) + os.mkdir(self.mountpoint, 0700) + except OSError as e: + self.err = "unable to create lock directory %s: %s" % (self.path, e.strerror) + return self + + def __exit__(self, e1, e2, e3): + (out, err) = system('/bin/umount', '-fl', self.mountpoint) + (out, err) = system('/bin/rm', '-rf', self.path); + return None + +syslog = logging.getLogger('storage-replicate') +handler = logging.handlers.SysLogHandler(address = '/dev/log') +handler.setFormatter(logging.Formatter('%(name)s: %(levelname)s: %(message)s')) +syslog.addHandler(handler) + +parser = argparse.ArgumentParser() +parser.add_argument('path', help='Path to the mountpoint to replicate') +parser.add_argument('dest', help='rsync-syntax destination for the replica') +args = parser.parse_args() + +# Find the specified path in /proc/mounts, matching only logical volumes +# and extract the volume group and name from the device entry +with open('/proc/mounts', 'r') as procmounts: + for line in procmounts: + match = re.match("/dev/mapper/([^-]+)-(\S+)\s+(\S+)\s", line) + if match and match.group(3) == args.path: + volgroup, volname = match.group(1, 2) + +if not (volgroup and volname): + syslog.error("%s is not a LVM volume mountpoint" % args.path) + sys.exit(2) + +# Now check that the specified volume has the correct attributes +(out, err) = system('/sbin/lvs', '--noheadings', '-o', 'lv_attr', '/dev/mapper/%s-%s' % (volgroup, volname)) +if err: + syslog.error("/sbin/lvs: " + err) + sys.exit(2) + +# Must be: not (s)napshot, (-) not mirror, and (a)ctive +if not re.match("[^s]..-a...", out.strip()): + syslog.error("%s-%s is not a suitable volume for replication" % (volgroup, volname)) + sys.exit(2) + +snapshot = volname + datetime.datetime.utcnow().strftime("%Y%m%d")) + +lockdir = '/var/run/lock/storage-replicate-%s-%s' % (volgroup, volname) +with Lockdir(lockdir) as lock: + + if lock.err: + # The lock directory already exists, so the previous + # rsync is running long. Log the event, and exit. + try: + with open('%s/started' % lockdir, 'r') as f: + when = f.readline().strip() + except IOError as e: + when = 'some time ago? (no start time file: %s)' % e.strerror + syslog.warning("Skipping replication; already in progress since %s" % when) + sys.exit(0) + + with open('%s/started' % lockdir, 'w+') as f: + f.write(datetime.datetime.utcnow().strftime("%Y-%m-%d% H%:M\n")) + + (out, err) = system( + '/sbin/lvcreate', '-L', '1T', '-s', '-n', snapshot, '%s/%s' % (volgroup, volname)) + if err: + syslog.critical('unable to create %s-%s: %s' % (volgroup, snapshot, err)) + sys.exit(1) + + (out, err) = system( + '/bin/mount', '-oro,noload', + '/dev/mapper/%s-%s' % (volgroup, snapshot), + lock.mountpoint) + if err: + syslog.critical('unable to mount %s-%s: %s' % (volgroup, snapshot, err)) + sys.exit(1) + + syslog.info("Replication of %s-%s starting" % (volgroup, snapshot)) + + (out, err) = system( + '/usr/bin/ionice', '-c', 'Idle', + '/usr/bin/rsync', '-auHAXs', '--delete-during', + '-e', 'ssh -i /root/.ssh/id_labstore', + '--inplace', '--append-verify', '-f', '._/etc/replication-rsync.conf', + '%s/.' % lock.mountpoint, args.dest) + if err: + syslog.critical('rsync failed: %s' % err) + exit(1) + + syslog.info("Replication of %s-%s complete" % (volgroup, snapshot)) + diff --git a/modules/labstore/manifests/fileserver.pp b/modules/labstore/manifests/fileserver.pp index cec9fbc..7d562ed 100644 --- a/modules/labstore/manifests/fileserver.pp +++ b/modules/labstore/manifests/fileserver.pp @@ -17,11 +17,26 @@ } file { '/etc/init/replica-addusers.conf': - source => 'puppet:///modules/labstore/replica-addusers.conf', + source => 'puppet:///modules/labstore/replica-addusers.conf', + owner => 'root', + group => 'root', + mode => '0444', + require => File['/usr/local/sbin/replica-addusers.pl'], + } + + file { '/etc/replication-rsync.conf': + source => 'puppet:///modules/labstore/replication-rsync.conf', owner => 'root', group => 'root', mode => '0444', - require => File['/usr/local/sbin/replica-addusers.pl'], + } + + file { '/usr/local/sbin/storage-replicate': + source => 'puppet:///modules/labstore/storage-replicate', + owner => 'root', + group => 'root', + mode => '0444', + require => File['/etc/replication-rsync.conf'], } # There is no service {} stanza on purpose -- this service -- To view, visit https://gerrit.wikimedia.org/r/224064 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I078179f84a323957a4124f502aea3073d5c993b5 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: coren <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
