[MediaWiki-commits] [Gerrit] operations/puppet[production]: nfs: Snapshot backup device on secondary DC before replicati...
Madhuvishy has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/334692 ) Change subject: nfs: Snapshot backup device on secondary DC before replicating latest from remote .. nfs: Snapshot backup device on secondary DC before replicating latest from remote Bug: T149870 Change-Id: Ic485bf5bce49581a71457f9dcf9802367e3c1c6e --- A modules/labstore/files/block_sync.py D modules/labstore/files/block_sync.sh M modules/labstore/manifests/bdsync.pp M modules/labstore/manifests/device_backup.pp M modules/role/manifests/labs/nfs/secondary_backup/misc.pp M modules/role/manifests/labs/nfs/secondary_backup/tools.pp 6 files changed, 178 insertions(+), 72 deletions(-) Approvals: Madhuvishy: Looks good to me, approved Rush: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/modules/labstore/files/block_sync.py b/modules/labstore/files/block_sync.py new file mode 100644 index 000..9881a3c --- /dev/null +++ b/modules/labstore/files/block_sync.py @@ -0,0 +1,141 @@ +#!/usr/bin/python3 + +import argparse +import fcntl +import os +import shlex +import subprocess +import sys + +BDSYNC = '/usr/bin/bdsync' +SNAPSHOT_MGR = '/usr/local/sbin/snapshot-manager' + + +def run_remote(cmd, r_host, r_user): +""" Run command on remote host over ssh +:param cmd: Command to run +:param r_host: Remote host to connect to +:param r_user: Remote user to run command as +:return returncode on success +:raise CalledProcessError +""" +remote_cmd = '/usr/bin/ssh -i /root/.ssh/id_labstore {}@{} "{}"'.format( +r_user, r_host, cmd) +return subprocess.check_call(shlex.split(remote_cmd)) + + +def run_local(cmd): +""" Run command locally +:param cmd: Command to run_local +:return returncode on success +:raise CalledProcessError +""" +return subprocess.check_call(shlex.split(cmd)) + + +def bdsync(local_device, r_host, r_vg, r_snapshot_name, r_user): +""" Run the block device sync from remote to local device using bdsync +:param local_device Local device to sync to +:param r_host Remote host to sync from +:param r_vg Remote volume group +:param r_snapshot_name Name of remote snapshot to sync from +:param r_user Username to run remote commands as +:return String (stdout_data) +""" +remotenice = 10 +blocksize = 16384 +server = '/usr/bin/nice -{} {} --server'.format(remotenice, BDSYNC) +remdata = '/usr/bin/ssh -i /root/.ssh/id_labstore {}@{} "{}"'.format(r_user, r_host, server) +sync_cmd = '{} --blocksize={} --remdata \'{}\' {} /dev/{}/{}' \ +.format(BDSYNC, blocksize, remdata, local_device, r_vg, r_snapshot_name) +progress_cmd = '/usr/bin/pv -p -t -e -r -a -b' +patch_cmd = '{} --patch={}'.format(BDSYNC, local_device) +sync = subprocess.Popen(shlex.split(sync_cmd), stdout=subprocess.PIPE) +progress = subprocess.Popen(shlex.split(progress_cmd), +stdin=sync.stdout, +stdout=subprocess.PIPE, +universal_newlines=True) +patch = subprocess.Popen(shlex.split(patch_cmd), stdin=progress.stdout) +patch.communicate()[0] + +if __name__ == '__main__': + +if os.geteuid() != 0: +print("Script needs to be run as root") +sys.exit(1) + +argparser = argparse.ArgumentParser() + +argparser.add_argument( +'r_host', +help='Remote host, e.g. 10.64.37.20 or labstore1004.eqiad.wmnet' +) +argparser.add_argument( +'r_vg', +help='Remote volume group, e.g. misc' +) +argparser.add_argument( +'r_lv', +help='Remote logical volume, e.g. test' +) +argparser.add_argument( +'r_snapshot_name', +help='Remote snapshot name, e.g. testsnap' +) +argparser.add_argument( +'l_vg', +help='Volume group of local device, e.g backup' +) +argparser.add_argument( +'l_lv', +help='Logical volume of local device, e.g test' +) +argparser.add_argument( +'l_snapshot_name', +help='Local snapshot name, e.g test-backup' +) +argparser.add_argument( +'l_snapshot_size', +help='Local snapshot size matching lvcreate expectations e.g. [1T|10G|100m]', +default='1T', +) +argparser.add_argument( +'--r_user', +help='Remote user to run commands over ssh as', +default='root', +) +args = argparser.parse_args() + +local_device = '/dev/{}/{}'.format(args.l_vg, args.l_lv) + +lock_file = open('/var/lock/{}_{}_backup.lock'.format(args.r_vg, args.r_lv, 'w+')) +fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) + +try: +try: +run_local('/bin/findmnt --notruncate -P -n -c {}'.format(local_device)) +print('Local device is mounted. Operations may be
[MediaWiki-commits] [Gerrit] operations/puppet[production]: nfs: Snapshot backup device on secondary DC before replicati...
Madhuvishy has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/334692 ) Change subject: nfs: Snapshot backup device on secondary DC before replicating latest from remote .. nfs: Snapshot backup device on secondary DC before replicating latest from remote Change-Id: Ic485bf5bce49581a71457f9dcf9802367e3c1c6e --- M modules/labstore/files/block_sync.sh M modules/labstore/manifests/device_backup.pp M modules/role/manifests/labs/nfs/secondary_backup/base.pp M modules/role/manifests/labs/nfs/secondary_backup/misc.pp M modules/role/manifests/labs/nfs/secondary_backup/tools.pp 5 files changed, 50 insertions(+), 20 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/92/334692/1 diff --git a/modules/labstore/files/block_sync.sh b/modules/labstore/files/block_sync.sh index 17fb234..9c556bc 100644 --- a/modules/labstore/files/block_sync.sh +++ b/modules/labstore/files/block_sync.sh @@ -3,8 +3,9 @@ function usage { echo -e "Usage:\n" echo -e "This script replicates an LVM2 block device across the network by taking a remote snapshot\n" -echo -e "block_sync.sh remote_host remote_volume_group remote_logical_volume snapshot_name local_device\n" -echo -e "Example: block_sync.sh 10.64.37.20 misc test snaptest /dev/backup/test\n" +echo -e "It also saves a snapshot of the local device before replicating from the remote snapshot\n" +echo -e "block_sync.sh remote_host remote_volume_group remote_logical_volume snapshot_name local_volume_group local_logical_volume local_snapshot_name local_snapshot_size\n" +echo -e "Example: block_sync.sh 10.64.37.20 misc test snaptest backup test misc-backup 1T\n" } if [[ "$#" -ne 5 || "$1" == '-h' ]]; then @@ -22,7 +23,12 @@ r_snapshot_name=$4 remotenice=10 -localdev=$5 +l_vg=$5 +l_lv=$6 +l_snapshot_name=$7 +l_snapshot_size=$8 +localdev="/dev/${l_vg}/${l_lv}" + blocksize=16384 remote_connect="ssh -i /root/.ssh/id_labstore ${r_user}@${r_host}" @@ -44,6 +50,8 @@ $remote_connect "${SNAPSHOT_MGR} create ${r_snapshot_name} ${r_vg}/${r_lv} --force" +${SNAPSHOT_MGR} create --size ${l_snapshot_size} ${l_snapshot_name} ${l_vg}/${l_lv} --force + $BDSYNC --blocksize=$blocksize \ --remdata "${remote_connect} 'nice -${remotenice} ${BDSYNC} --server'" \ $localdev "/dev/${r_vg}/${r_snapshot_name}" | \ diff --git a/modules/labstore/manifests/device_backup.pp b/modules/labstore/manifests/device_backup.pp index cef0a08..026dce4 100644 --- a/modules/labstore/manifests/device_backup.pp +++ b/modules/labstore/manifests/device_backup.pp @@ -3,7 +3,10 @@ $remote_vg, $remote_lv, $remote_snapshot, -$localdev, +$local_lv, +$local_vg, +$local_snapshot, +$local_snapshot_size, $weekday, $hour=0, $minute=0, @@ -26,10 +29,19 @@ cron { "block_sync-${remote_vg}/${remote_lv}=>${localdev}": ensure => 'present', user=> 'root', -command => "${block_sync} ${remote_ip} ${remote_vg} ${remote_lv} ${remote_snapshot} ${localdev}", +command => "${block_sync} ${remote_ip} ${remote_vg} ${remote_lv} ${remote_snapshot} ${local_vg} ${local_lv} ${local_snapshot} ${local_snapshot_size}", weekday => $day[$weekday], hour=> $hour, minute => $minute, -environment => 'MAILTO=labs-ad...@lists.wikimedia.org' +environment => 'MAILTO=labs-ad...@lists.wikimedia.org', +require => File['/usr/local/sbin/snapshot-manager'] +} + +file { '/usr/local/sbin/snapshot-manager': +ensure => present, +owner => 'root', +group => 'root', +mode => '0755', +source => 'puppet:///modules/labstore/snapshot-manager.py', } } diff --git a/modules/role/manifests/labs/nfs/secondary_backup/base.pp b/modules/role/manifests/labs/nfs/secondary_backup/base.pp index fd2a403..a1a54cd 100644 --- a/modules/role/manifests/labs/nfs/secondary_backup/base.pp +++ b/modules/role/manifests/labs/nfs/secondary_backup/base.pp @@ -6,6 +6,10 @@ include labstore::backup_keys +package { ['python3', 'python3-dateutil']: +ensure => present, +} + file {'/srv/backup': ensure => 'directory', } diff --git a/modules/role/manifests/labs/nfs/secondary_backup/misc.pp b/modules/role/manifests/labs/nfs/secondary_backup/misc.pp index 617ebd9..a3afb3c 100644 --- a/modules/role/manifests/labs/nfs/secondary_backup/misc.pp +++ b/modules/role/manifests/labs/nfs/secondary_backup/misc.pp @@ -8,13 +8,16 @@ } labstore::device_backup { 'secondary-misc': -remotehost => 'labstore1004.eqiad.wmnet', -remote_vg => 'misc', -remote_lv => 'misc-project', -remote_snapshot => 'misc-snap', -localdev=>