ArielGlenn has uploaded a new change for review. https://gerrit.wikimedia.org/r/269933
Change subject: dumps mirroring tool, change remote and local to source and dest ...................................................................... dumps mirroring tool, change remote and local to source and dest There should be no concept of 'local' files for rsync. Change-Id: Idccca8e7b13c08039353cd57d287cf8e4c01d452 --- M tools/mirrors/wmfdumpsmirror.py 1 file changed, 58 insertions(+), 58 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/33/269933/1 diff --git a/tools/mirrors/wmfdumpsmirror.py b/tools/mirrors/wmfdumpsmirror.py index 0121937..0f10598 100644 --- a/tools/mirrors/wmfdumpsmirror.py +++ b/tools/mirrors/wmfdumpsmirror.py @@ -129,7 +129,7 @@ # not to distant future it will be maybe a stream cause we'll be # feeding a list from the api, that will be sketchy def __init__(self, file_list_fd, max_files_per_job, max_du_per_job, - worker_count, rsync_remote_path, local_path, rsync_args, + worker_count, rsync_source_path, dest_path, rsync_args, verbose, dryrun): self.file_list_fd = file_list_fd self.max_files_per_job = max_files_per_job @@ -137,14 +137,14 @@ self.verbose = verbose self.dryrun = dryrun self.rsync_args = rsync_args - self.local_path = local_path - self.rsyncer = Rsyncer(rsync_remote_path, local_path, + self.dest_path = dest_path + self.rsyncer = Rsyncer(rsync_source_path, dest_path, self.rsync_args, self.verbose, self.dryrun) self.jqueue = JobQueue(worker_count, self.rsyncer, self.verbose, self.dryrun) self.date_pattern = re.compile('^20[0-9]{6}$') self.jobs_per_project = {} self.jobs = {} - self.deleter = DirDeleter(self.jobs_per_project, self.local_path, + self.deleter = DirDeleter(self.jobs_per_project, self.dest_path, self.verbose, self.dryrun) def stuff_jobs_on_queue(self): @@ -225,17 +225,17 @@ class DirDeleter(object): """remove all dirs for the project that are not in the list of dirs to rsync, we don't want them any more""" - def __init__(self, jobs_per_project, local_path, verbose, dryrun): + def __init__(self, jobs_per_project, dest_path, verbose, dryrun): self.jobs_per_project = jobs_per_project - self.local_path = local_path + self.dest_path = dest_path self.verbose = verbose self.dryrun = dryrun self.job_list = None - def get_full_local_path(self, rel_path): + def get_full_dest_path(self, rel_path): if rel_path.startswith(os.sep): rel_path = rel_path[len(os.sep):] - return os.path.join(self.local_path, rel_path) + return os.path.join(self.dest_path, rel_path) def set_job_list(self, job_list): self.job_list = job_list @@ -288,9 +288,9 @@ # remove the ones we didn't as we no longer want them project_dirs_rsynced = self.list_dirs_rsynced_for_proj(project) - if not os.path.exists(self.get_full_local_path(project)): + if not os.path.exists(self.get_full_dest_path(project)): return - dirs = os.listdir(self.get_full_local_path(project)) + dirs = os.listdir(self.get_full_dest_path(project)) if self.dryrun or self.verbose: MirrorMsg.display("for project %s:" % project) @@ -310,10 +310,10 @@ MirrorMsg.display("'%s'" % dir_name, True) if not self.dryrun: try: - shutil.rmtree(self.get_full_local_path(dir_name)) + shutil.rmtree(self.get_full_dest_path(dir_name)) except: MirrorMsg.warn("failed to remove directory or contents of %s\n" - % self.get_full_local_path(dir_name)) + % self.get_full_dest_path(dir_name)) pass if self.dryrun or self.verbose: MirrorMsg.display('\n', True) @@ -329,7 +329,7 @@ for dirname in dirs: if dirname in project_dirs_rsynced: - files_existing = os.listdir(self.get_full_local_path( + files_existing = os.listdir(self.get_full_dest_path( os.path.join(project, dirname))) files_rsynced = self.list_files_rsynced_for_proj_dir(project, dirname) files_to_toss = [f for f in files_existing if not f in files_rsynced] @@ -339,13 +339,13 @@ if not len(files_to_toss): MirrorMsg.display("None", True) for tossme in files_to_toss: - file_name = self.get_full_local_path( + file_name = self.get_full_dest_path( os.path.join(project, dirname, tossme)) if os.path.isdir(file_name): continue if self.dryrun or self.verbose: # we should never be pushing directories across as part of the rsync. - # so if we have a local directory, leave it alone + # so if we have a dest directory, leave it alone MirrorMsg.display("'%s'" % tossme, True) if not self.dryrun: try: @@ -373,10 +373,10 @@ class Rsyncer(JobHandler): """all the info about rsync you ever wanted to know but were afraid to ask...""" - def __init__(self, rsync_remote_path, local_path, + def __init__(self, rsync_source_path, dest_path, rsync_args, verbose, dryrun): - self.rsync_remote_path = rsync_remote_path - self.local_path = local_path + self.rsync_source_path = rsync_source_path + self.dest_path = dest_path self.rsync_args = rsync_args self.verbose = verbose self.dryrun = dryrun @@ -389,7 +389,7 @@ command = ["/usr/bin/rsync"] command.extend(["--files-from", "-"]) command.extend(self.rsync_args) - command.extend([self.rsync_remote_path, self.local_path]) + command.extend([self.rsync_source_path, self.dest_path]) if self.dryrun or self.verbose: command_string = " ".join(command) @@ -576,20 +576,20 @@ class Mirror(object): """reading directories for rsync from a specified file, - rsync each one; remove directories locally that aren't in the file""" + rsync each one; remove directories destly that aren't in the file""" - def __init__(self, host_name, remote_dir_name, local_dir_name, + def __init__(self, host_name, source_dir_name, dest_dir_name, rsync_list, rsync_args, max_files_per_job, max_du_per_job, worker_count, skip_deletes, verbose, dryrun): self.host_name = host_name - self.remote_dir_name = remote_dir_name - self.local_dir_name = local_dir_name + self.source_dir_name = source_dir_name + self.dest_dir_name = dest_dir_name if self.host_name: - self.rsync_remote_root = self.host_name + "::" + self.remote_dir_name + self.rsync_source_root = self.host_name + "::" + self.source_dir_name else: - # the 'remote' dir is actually on the local host and we are - # rsyncing from one locally mounted filesystem to another - self.rsync_remote_root = self.remote_dir_name + # the 'source' dir is actually on the dest host and we are + # rsyncing from one destly mounted filesystem to another + self.rsync_source_root = self.source_dir_name self.rsync_file_list = rsync_list self.rsync_args = rsync_args self.verbose = verbose @@ -600,16 +600,16 @@ self.skip_deletes = skip_deletes self.files_processor = None - def get_full_local_path(self, rel_path): + def get_full_dest_path(self, rel_path): if rel_path.startswith(os.sep): rel_path = rel_path[len(os.sep):] - return os.path.join(self.local_dir_name, rel_path) + return os.path.join(self.dest_dir_name, rel_path) def get_rsync_file_listing(self): - """via rsync, get full list of files for rsync from remote host""" + """via rsync, get full list of files for rsync from source host""" command = ["/usr/bin/rsync", "-tp", - self.rsync_remote_root + '/' + self.rsync_file_list, - self.local_dir_name] + self.rsync_source_root + '/' + self.rsync_file_list, + self.dest_dir_name] # here we don't do a dry run, we will actually retrieve # the list (because otherwise the rest of the run # won't produce any information about what the run @@ -621,15 +621,15 @@ raise MirrorError("_failed to get list of files for rsync\n") def process_rsync_file_list(self): - fdesc = open(self.get_full_local_path(self.rsync_file_list)) + fdesc = open(self.get_full_dest_path(self.rsync_file_list)) if not fdesc: raise MirrorError("failed to open list of files for rsync", - os.path.join(self.local_dir_name, + os.path.join(self.dest_dir_name, self.rsync_file_list)) self.files_processor = RsyncFilesProcessor( fdesc, self.max_files_per_job, self.max_du_per_job, - self.worker_count, self.rsync_remote_root, - self.local_dir_name, self.rsync_args, self.verbose, self.dryrun) + self.worker_count, self.rsync_source_root, + self.dest_dir_name, self.rsync_args, self.verbose, self.dryrun) # create all jobs and put on todo queue self.files_processor.stuff_jobs_on_queue() fdesc.close() @@ -656,8 +656,8 @@ if message: sys.stderr.write("%s\n" % message) usage_message = """ -Usage: python wmfdumpsmirror.py [--hostname dumpserver] -remotedir dirpath - --localdir dirpath [--rsyncargs args] [--rsynclist filename] +Usage: python wmfdumpsmirror.py [--hostname dumpserver] -sourcedir dirpath + --destdir dirpath [--rsyncargs args] [--rsynclist filename] [--filesperjob] [--sizeperjob] [--workercount] [--dryrun] [--skipdeletes] [--verbose] @@ -669,10 +669,10 @@ --hostname: the name of the dump rsync server to contact if this is left blank, the copy will be done from one path - to another on the local host ---remotedir: the remote path to the top of the dump directory tree + to another on the dest host +--sourcedir: the source path to the top of the dump directory tree containing the mirror ---localdir: the full path to the top of the local directory tree +--destdir: the full path to the top of the dest directory tree containing the mirror --rsyncargs: arguments to be passed through to rsync, comma-separated, with 'arg=value' for arguments that require a value @@ -696,7 +696,7 @@ --verbose: print lots of diagnostic output Example: python wmfdumpsmirror.py --hostname dumps.wikimedia.org - --localdir /opt/data/dumps --rsyncfile rsync-list.txt.rsync + --destdir /opt/data/dumps --rsyncfile rsync-list.txt.rsync """ sys.stderr.write(usage_message) sys.exit(1) @@ -732,8 +732,8 @@ def main(): host_name = None - local_dir = None - remote_dir = None + dest_dir = None + source_dir = None rsync_list = None rsync_args = None max_files_per_job = None @@ -745,7 +745,7 @@ try: (options, remainder) = getopt.gnu_getopt( - sys.argv[1:], "", ["hostname=", "localdir=", "remotedir=", + sys.argv[1:], "", ["hostname=", "destdir=", "sourcedir=", "rsynclist=", "rsyncargs=", "filesperjob=", "sizeperjob=", "workercount=", "dryrun", "skipdeletes", "verbose"]) @@ -761,10 +761,10 @@ max_files_per_job = int(val) elif opt == "--hostname": host_name = val - elif opt == "--localdir": - local_dir = val - elif opt == "--remotedir": - remote_dir = val + elif opt == "--destdir": + dest_dir = val + elif opt == "--sourcedir": + source_dir = val elif opt == "--rsynclist": rsync_list = val elif opt == "--rsyncargs": @@ -783,12 +783,12 @@ if len(remainder) > 0: usage("Unknown option specified") - if not remote_dir or not local_dir: + if not source_dir or not dest_dir: usage("Missing required option") - if not os.path.isdir(local_dir): - usage("local rsync directory %s" - " does not exist or is not a directory" % local_dir) + if not os.path.isdir(dest_dir): + usage("dest rsync directory %s" + " does not exist or is not a directory" % dest_dir) if not rsync_list: rsync_list = "rsync-list.txt.rsync" @@ -805,13 +805,13 @@ if not rsync_args: rsync_args = ["-aq"] - if remote_dir[-1] == '/': - remote_dir = remote_dir[:-1] + if source_dir[-1] == '/': + source_dir = source_dir[:-1] - if local_dir[-1] == '/': - local_dir = local_dir[:-1] + if dest_dir[-1] == '/': + dest_dir = dest_dir[:-1] - mirror = Mirror(host_name, remote_dir, local_dir, + mirror = Mirror(host_name, source_dir, dest_dir, rsync_list, rsync_args, max_files_per_job, max_du_per_job, worker_count, skip_deletes, verbose, dryrun) -- To view, visit https://gerrit.wikimedia.org/r/269933 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Idccca8e7b13c08039353cd57d287cf8e4c01d452 Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps Gerrit-Branch: ariel Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits