Rafidaslam has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/401191 )
Change subject: download_dump: Handle cases when the dump file already exists ...................................................................... download_dump: Handle cases when the dump file already exists Bug: T183667 Change-Id: Id205bd4f03393c8c59be918449dfd47366115f00 --- M scripts/maintenance/download_dump.py 1 file changed, 57 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/91/401191/1 diff --git a/scripts/maintenance/download_dump.py b/scripts/maintenance/download_dump.py index 853d33f..6d906d5 100644 --- a/scripts/maintenance/download_dump.py +++ b/scripts/maintenance/download_dump.py @@ -26,6 +26,8 @@ import os.path import sys +from datetime import datetime +from glob import glob from os import remove, symlink, urandom try: @@ -90,11 +92,64 @@ temp_filename = download_filename + '-' + \ binascii.b2a_hex(urandom(8)).decode('ascii') + '.part' - file_final_storepath = os.path.join( - self.getOption('storepath'), download_filename) + if self.getOption('revision') == 'latest': + date_str = datetime.now().strftime('%Y%m%d') + + # Make a new filename with the current date placed before the + # extension, + # for example 'idwiki-latest-abstract.xml-rss.xml' + # to 'idwiki-latest-abstract.xml-rss.20180101.xml' + # (just for the `latest` revision). + new_filename = download_filename.split('.') + new_filename.insert(-1, date_str) + new_filename = '.'.join(new_filename) + + file_final_storepath = os.path.join( + self.getOption('storepath'), new_filename) + else: + file_final_storepath = os.path.join( + self.getOption('storepath'), download_filename) + file_current_storepath = os.path.join( self.getOption('storepath'), temp_filename) + # Check if the file already exists in local + if os.path.exists(file_final_storepath): + pywikibot.output('File with path {path} already exists and ' + 'will not be downloaded again.'.format( + path=file_final_storepath + )) + return + + # Warn the user if the previous `latest` revision with the same + # name already exists. + if self.getOption('revision') == 'latest': + # The pattern to detect the same file, but different name. + filepath_glob_pattern = file_final_storepath.split('.') + filepath_glob_pattern[-2] = ( + '[0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]') + filepath_glob_pattern = '.'.join(filepath_glob_pattern) + + similar_filepaths = glob(filepath_glob_pattern) + + # Search for file with the newest date + newest_file = [datetime(1, 1, 1), ''] # [date, filename] + for filepath in similar_filepaths: + file_date = datetime.strptime( + filepath.split('.')[-2], '%Y%m%d') + if file_date > newest_file[0]: + newest_file = [file_date, filepath] + + pywikibot.output('Warning, you\'re about to download a file that ' + 'is already exist before from the `latest`' + ' revision. The newest file downloaded for this ' + 'filename is located at {filepath} which was ' + 'downloaded at {date}. The file content might ' + 'be the same with the file that will be ' + 'downloaded'.format( + filepath=newest_file[1], + date=newest_file[0].strftime('%Y-%m-%d'))) + # https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps toolforge_dump_filepath = self.get_dump_name( self.getOption('wikiname'), self.getOption('filename')) -- To view, visit https://gerrit.wikimedia.org/r/401191 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id205bd4f03393c8c59be918449dfd47366115f00 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Rafidaslam <rafidt...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits