Rafidaslam has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/401191 )

Change subject: download_dump: Handle cases when the dump file already exists
......................................................................

download_dump: Handle cases when the dump file already exists

Bug: T183667
Change-Id: Id205bd4f03393c8c59be918449dfd47366115f00
---
M scripts/maintenance/download_dump.py
1 file changed, 57 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/91/401191/1

diff --git a/scripts/maintenance/download_dump.py 
b/scripts/maintenance/download_dump.py
index 853d33f..6d906d5 100644
--- a/scripts/maintenance/download_dump.py
+++ b/scripts/maintenance/download_dump.py
@@ -26,6 +26,8 @@
 import os.path
 import sys
 
+from datetime import datetime
+from glob import glob
 from os import remove, symlink, urandom
 
 try:
@@ -90,11 +92,64 @@
         temp_filename = download_filename + '-' + \
             binascii.b2a_hex(urandom(8)).decode('ascii') + '.part'
 
-        file_final_storepath = os.path.join(
-            self.getOption('storepath'), download_filename)
+        if self.getOption('revision') == 'latest':
+            date_str = datetime.now().strftime('%Y%m%d')
+
+            # Make a new filename with the current date placed before the
+            # extension,
+            # for example 'idwiki-latest-abstract.xml-rss.xml'
+            # to 'idwiki-latest-abstract.xml-rss.20180101.xml'
+            # (just for the `latest` revision).
+            new_filename = download_filename.split('.')
+            new_filename.insert(-1, date_str)
+            new_filename = '.'.join(new_filename)
+
+            file_final_storepath = os.path.join(
+                self.getOption('storepath'), new_filename)
+        else:
+            file_final_storepath = os.path.join(
+                self.getOption('storepath'), download_filename)
+
         file_current_storepath = os.path.join(
             self.getOption('storepath'), temp_filename)
 
+        # Check if the file already exists in local
+        if os.path.exists(file_final_storepath):
+            pywikibot.output('File with path {path} already exists and '
+                             'will not be downloaded again.'.format(
+                                 path=file_final_storepath
+                             ))
+            return
+
+        # Warn the user if the previous `latest` revision with the same
+        # name already exists.
+        if self.getOption('revision') == 'latest':
+            # The pattern to detect the same file, but different name.
+            filepath_glob_pattern = file_final_storepath.split('.')
+            filepath_glob_pattern[-2] = (
+                '[0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]')
+            filepath_glob_pattern = '.'.join(filepath_glob_pattern)
+
+            similar_filepaths = glob(filepath_glob_pattern)
+
+            # Search for file with the newest date
+            newest_file = [datetime(1, 1, 1), '']  # [date, filename]
+            for filepath in similar_filepaths:
+                file_date = datetime.strptime(
+                    filepath.split('.')[-2], '%Y%m%d')
+                if file_date > newest_file[0]:
+                    newest_file = [file_date, filepath]
+
+            pywikibot.output('Warning, you\'re about to download a file that '
+                             'is already exist before from the `latest`'
+                             ' revision. The newest file downloaded for this '
+                             'filename is located at {filepath} which was '
+                             'downloaded at {date}. The file content might '
+                             'be the same with the file that will be '
+                             'downloaded'.format(
+                                 filepath=newest_file[1],
+                                 date=newest_file[0].strftime('%Y-%m-%d')))
+
         # https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps
         toolforge_dump_filepath = self.get_dump_name(
             self.getOption('wikiname'), self.getOption('filename'))

-- 
To view, visit https://gerrit.wikimedia.org/r/401191
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id205bd4f03393c8c59be918449dfd47366115f00
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Rafidaslam <rafidt...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to