Madhuvishy has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/334692 )
Change subject: nfs: Snapshot backup device on secondary DC before replicating latest from remote ...................................................................... nfs: Snapshot backup device on secondary DC before replicating latest from remote Bug: T149870 Change-Id: Ic485bf5bce49581a71457f9dcf9802367e3c1c6e --- A modules/labstore/files/block_sync.py D modules/labstore/files/block_sync.sh M modules/labstore/manifests/bdsync.pp M modules/labstore/manifests/device_backup.pp M modules/role/manifests/labs/nfs/secondary_backup/misc.pp M modules/role/manifests/labs/nfs/secondary_backup/tools.pp 6 files changed, 178 insertions(+), 72 deletions(-) Approvals: Madhuvishy: Looks good to me, approved Rush: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/modules/labstore/files/block_sync.py b/modules/labstore/files/block_sync.py new file mode 100644 index 0000000..9881a3c --- /dev/null +++ b/modules/labstore/files/block_sync.py @@ -0,0 +1,141 @@ +#!/usr/bin/python3 + +import argparse +import fcntl +import os +import shlex +import subprocess +import sys + +BDSYNC = '/usr/bin/bdsync' +SNAPSHOT_MGR = '/usr/local/sbin/snapshot-manager' + + +def run_remote(cmd, r_host, r_user): + """ Run command on remote host over ssh + :param cmd: Command to run + :param r_host: Remote host to connect to + :param r_user: Remote user to run command as + :return returncode on success + :raise CalledProcessError + """ + remote_cmd = '/usr/bin/ssh -i /root/.ssh/id_labstore {}@{} "{}"'.format( + r_user, r_host, cmd) + return subprocess.check_call(shlex.split(remote_cmd)) + + +def run_local(cmd): + """ Run command locally + :param cmd: Command to run_local + :return returncode on success + :raise CalledProcessError + """ + return subprocess.check_call(shlex.split(cmd)) + + +def bdsync(local_device, r_host, r_vg, r_snapshot_name, r_user): + """ Run the block device sync from remote to local device using bdsync + :param local_device Local device to sync to + :param r_host Remote host to sync from + :param r_vg Remote volume group + :param r_snapshot_name Name of remote snapshot to sync from + :param r_user Username to run remote commands as + :return String (stdout_data) + """ + remotenice = 10 + blocksize = 16384 + server = '/usr/bin/nice -{} {} --server'.format(remotenice, BDSYNC) + remdata = '/usr/bin/ssh -i /root/.ssh/id_labstore {}@{} "{}"'.format(r_user, r_host, server) + sync_cmd = '{} --blocksize={} --remdata \'{}\' {} /dev/{}/{}' \ + .format(BDSYNC, blocksize, remdata, local_device, r_vg, r_snapshot_name) + progress_cmd = '/usr/bin/pv -p -t -e -r -a -b' + patch_cmd = '{} --patch={}'.format(BDSYNC, local_device) + sync = subprocess.Popen(shlex.split(sync_cmd), stdout=subprocess.PIPE) + progress = subprocess.Popen(shlex.split(progress_cmd), + stdin=sync.stdout, + stdout=subprocess.PIPE, + universal_newlines=True) + patch = subprocess.Popen(shlex.split(patch_cmd), stdin=progress.stdout) + patch.communicate()[0] + +if __name__ == '__main__': + + if os.geteuid() != 0: + print("Script needs to be run as root") + sys.exit(1) + + argparser = argparse.ArgumentParser() + + argparser.add_argument( + 'r_host', + help='Remote host, e.g. 10.64.37.20 or labstore1004.eqiad.wmnet' + ) + argparser.add_argument( + 'r_vg', + help='Remote volume group, e.g. misc' + ) + argparser.add_argument( + 'r_lv', + help='Remote logical volume, e.g. test' + ) + argparser.add_argument( + 'r_snapshot_name', + help='Remote snapshot name, e.g. testsnap' + ) + argparser.add_argument( + 'l_vg', + help='Volume group of local device, e.g backup' + ) + argparser.add_argument( + 'l_lv', + help='Logical volume of local device, e.g test' + ) + argparser.add_argument( + 'l_snapshot_name', + help='Local snapshot name, e.g test-backup' + ) + argparser.add_argument( + 'l_snapshot_size', + help='Local snapshot size matching lvcreate expectations e.g. [1T|10G|100m]', + default='1T', + ) + argparser.add_argument( + '--r_user', + help='Remote user to run commands over ssh as', + default='root', + ) + args = argparser.parse_args() + + local_device = '/dev/{}/{}'.format(args.l_vg, args.l_lv) + + lock_file = open('/var/lock/{}_{}_backup.lock'.format(args.r_vg, args.r_lv, 'w+')) + fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) + + try: + try: + run_local('/bin/findmnt --notruncate -P -n -c {}'.format(local_device)) + print('Local device is mounted. Operations may be unsafe') + sys.exit(1) + except subprocess.CalledProcessError: + # Continue if the local device is not mounted + pass + + # Make sure all the executables are present on remote and local + run_remote('/usr/bin/test -e {}'.format(BDSYNC), args.r_host, args.r_user) + run_remote('/usr/bin/test -e {}'.format(SNAPSHOT_MGR), args.r_host, args.r_user) + run_local('/usr/bin/test -e {}'.format(BDSYNC)) + run_local('/usr/bin/test -e {}'.format(SNAPSHOT_MGR)) + + # Take a snapshot of the backup device on local before replicating from remote + run_local('{} create --size {} {} {}/{} --force'.format( + SNAPSHOT_MGR, + args.l_snapshot_size, args.l_snapshot_name, args.l_vg, args.l_lv)) + + # Snapshot state of remote logical volume to backup from + run_remote('{} create {} {}/{} --force'.format( + SNAPSHOT_MGR, args.r_snapshot_name, args.r_vg, args.r_lv), args.r_host, args.r_user) + + bdsync(local_device, args.r_host, args.r_vg, args.r_snapshot_name, args.r_user) + + finally: + fcntl.flock(lock_file, fcntl.LOCK_UN) diff --git a/modules/labstore/files/block_sync.sh b/modules/labstore/files/block_sync.sh deleted file mode 100644 index 17fb234..0000000 --- a/modules/labstore/files/block_sync.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -function usage { - echo -e "Usage:\n" - echo -e "This script replicates an LVM2 block device across the network by taking a remote snapshot\n" - echo -e "block_sync.sh remote_host remote_volume_group remote_logical_volume snapshot_name local_device\n" - echo -e "Example: block_sync.sh 10.64.37.20 misc test snaptest /dev/backup/test\n" -} - -if [[ "$#" -ne 5 || "$1" == '-h' ]]; then - usage - exit 1 -fi - -BDSYNC='/usr/bin/bdsync' -SNAPSHOT_MGR='/usr/local/sbin/snapshot-manager' -PV_OPTIONS='-p -t -e -r -a -b' -r_user='root' -r_host=$1 -r_vg=$2 -r_lv=$3 -r_snapshot_name=$4 -remotenice=10 - -localdev=$5 -blocksize=16384 - -remote_connect="ssh -i /root/.ssh/id_labstore ${r_user}@${r_host}" - -/bin/findmnt --notruncate -P -n -c $localdev -if [ $? -eq 0 ] -then - echo "Local device is mounted. Operations may be unsafe." - exit 1 -fi - -set -e - -( - /usr/bin/flock --nonblock --exclusive 200 - - $remote_connect "/usr/bin/test -e ${BDSYNC}" - $remote_connect "/usr/bin/test -e ${SNAPSHOT_MGR}" - - $remote_connect "${SNAPSHOT_MGR} create ${r_snapshot_name} ${r_vg}/${r_lv} --force" - - $BDSYNC --blocksize=$blocksize \ - --remdata "${remote_connect} 'nice -${remotenice} ${BDSYNC} --server'" \ - $localdev "/dev/${r_vg}/${r_snapshot_name}" | \ - pv $PV_OPTIONS | \ - sudo $BDSYNC --patch=$localdev - -) 200>/var/lock/${r_vg}_${r_lv}_backup.lock diff --git a/modules/labstore/manifests/bdsync.pp b/modules/labstore/manifests/bdsync.pp index 21ac945..220c389 100644 --- a/modules/labstore/manifests/bdsync.pp +++ b/modules/labstore/manifests/bdsync.pp @@ -1,12 +1,12 @@ class labstore::bdsync { - package { 'bdsync': + package { ['python3', 'python3-dateutil', 'bdsync']: ensure => present, before => File['/usr/local/sbin/block_sync'], } file { '/usr/local/sbin/block_sync': - source => 'puppet:///modules/labstore/block_sync.sh', + source => 'puppet:///modules/labstore/block_sync.py', owner => 'root', group => 'root', mode => '0755', diff --git a/modules/labstore/manifests/device_backup.pp b/modules/labstore/manifests/device_backup.pp index 0eea4b1..c02aa90 100644 --- a/modules/labstore/manifests/device_backup.pp +++ b/modules/labstore/manifests/device_backup.pp @@ -3,7 +3,10 @@ $remote_vg, $remote_lv, $remote_snapshot, - $localdev, + $local_vg, + $local_lv, + $local_snapshot, + $local_snapshot_size, $weekday, $hour=0, $minute=0, @@ -23,13 +26,22 @@ } $block_sync='/usr/local/sbin/block_sync' - cron { "block_sync-${remote_vg}/${remote_lv}=>${localdev}": + cron { "block_sync-${remote_vg}/${remote_lv}=>${local_vg}/${local_lv}": ensure => 'present', user => 'root', - command => "${block_sync} ${remote_ip} ${remote_vg} ${remote_lv} ${remote_snapshot} ${localdev}", + command => "${block_sync} ${remote_ip} ${remote_vg} ${remote_lv} ${remote_snapshot} ${local_vg} ${local_lv} ${local_snapshot} ${local_snapshot_size}", weekday => $day[$weekday], hour => $hour, minute => $minute, environment => 'MAILTO=labs-ad...@lists.wikimedia.org', + require => File['/usr/local/sbin/snapshot-manager'], + } + + file { '/usr/local/sbin/snapshot-manager': + ensure => present, + owner => 'root', + group => 'root', + mode => '0755', + source => 'puppet:///modules/labstore/snapshot-manager.py', } } diff --git a/modules/role/manifests/labs/nfs/secondary_backup/misc.pp b/modules/role/manifests/labs/nfs/secondary_backup/misc.pp index 617ebd9..a9e312f 100644 --- a/modules/role/manifests/labs/nfs/secondary_backup/misc.pp +++ b/modules/role/manifests/labs/nfs/secondary_backup/misc.pp @@ -8,13 +8,16 @@ } labstore::device_backup { 'secondary-misc': - remotehost => 'labstore1004.eqiad.wmnet', - remote_vg => 'misc', - remote_lv => 'misc-project', - remote_snapshot => 'misc-snap', - localdev => '/dev/backup/misc-project', - weekday => 'wednesday', - hour => 20, + remotehost => 'labstore1004.eqiad.wmnet', + remote_vg => 'misc', + remote_lv => 'misc-project', + remote_snapshot => 'misc-snap', + local_vg => 'backup', + local_lv => 'misc-project', + local_snapshot => 'misc-project-backup', + local_snapshot_size => '2T', + weekday => 'wednesday', + hour => 20, } } diff --git a/modules/role/manifests/labs/nfs/secondary_backup/tools.pp b/modules/role/manifests/labs/nfs/secondary_backup/tools.pp index fe0020f..adf4fb9 100644 --- a/modules/role/manifests/labs/nfs/secondary_backup/tools.pp +++ b/modules/role/manifests/labs/nfs/secondary_backup/tools.pp @@ -8,13 +8,16 @@ } labstore::device_backup { 'secondary-tools': - remotehost => 'labstore1004.eqiad.wmnet', - remote_vg => 'tools', - remote_lv => 'tools-project', - remote_snapshot => 'tools-snap', - localdev => '/dev/backup/tools-project', - weekday => 'tuesday', - hour => 20, + remotehost => 'labstore1004.eqiad.wmnet', + remote_vg => 'tools', + remote_lv => 'tools-project', + remote_snapshot => 'tools-snap', + local_vg => 'backup', + local_lv => 'tools-project', + local_snapshot => 'tools-project-backup', + local_snapshot_size => '2T', + weekday => 'tuesday', + hour => 20, } } -- To view, visit https://gerrit.wikimedia.org/r/334692 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ic485bf5bce49581a71457f9dcf9802367e3c1c6e Gerrit-PatchSet: 12 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Madhuvishy <mviswanat...@wikimedia.org> Gerrit-Reviewer: Madhuvishy <mviswanat...@wikimedia.org> Gerrit-Reviewer: Rush <r...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits