Prianka has uploaded a new change for review.
https://gerrit.wikimedia.org/r/181354
Change subject: Bug T66159
......................................................................
Bug T66159
Porting tag_nowcommons.py to core
Error received:NameError
Traceback (most recent call last):
File "pwb.py", line 221, in <module>
run_python_file(filename, argv, argvu, file_package)
File "pwb.py", line 80, in run_python_file
exec(compile(source, filename, "exec"), main_mod.__dict__)
File "./scripts/nowcommonmergefile.py", line 580, in <module>
main()
File "./scripts/nowcommonmergefile.py", line 544, in main
genFactory = pagegenerators.GeneratorFactory()
NameError: global name 'pagegenerators' is not defined
<type 'exceptions.NameError'>
CRITICAL: Waiting for 1 network thread(s) to finish. Press ctrl-c to abort
Change-Id: Ie14931b782afff92c079ec92d5808bf28a368db1
---
A scripts/nowcommonmergefile.py
1 file changed, 580 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/54/181354/1
diff --git a/scripts/nowcommonmergefile.py b/scripts/nowcommonmergefile.py
new file mode 100644
index 0000000..b46327d
--- /dev/null
+++ b/scripts/nowcommonmergefile.py
@@ -0,0 +1,580 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Script to delete files that are also present on Wikimedia Commons.
+
+Do not run this script on Wikimedia Commons itself. It works based on
+a given array of templates defined below.
+
+Files are downloaded and compared. If the files match, it can be deleted on
+the source wiki. If multiple versions of the file exist, the script will not
+delete. If the SHA1 comparison is not equal, the script will not delete.
+
+A sysop account on the local wiki is required if you want all features of
+this script to work properly.
+
+This script understands various command-line arguments:
+ -always run automatically, do not ask any questions. All files
+ that qualify for deletion are deleted. Reduced screen
+ output.
+
+ -replace replace links if the files are equal and the file names
+ differ
+
+ -replacealways replace links if the files are equal and the file names
+ differ without asking for confirmation
+
+ -replaceloose Do loose replacements. This will replace all occurrences
+ of the name of the image (and not just explicit image
+ syntax). This should work to catch all instances of the
+ file, including where it is used as a template parameter
+ or in galleries. However, it can also make more
+ mistakes.
+
+ -replaceonly Use this if you do not have a local sysop account, but do
+ wish to replace links from the NowCommons template.
+
+ -hash Use the hash to identify the images that are the same. It
+ doesn't work always, so the bot opens two tabs to let to
+ the user to check if the images are equal or not.
+
+-- Example --
+python nowcommons.py -replaceonly -hash -replace -replaceloose -replacealways
+
+-- Known issues --
+Please fix these if you are capable and motivated:
+- if a file marked nowcommons is not present on Wikimedia Commons, the bot
+ will exit.
+"""
+#
+# (C) Wikipedian, 2006-2007
+# (C) Siebrand Mazeland, 2007-2008
+# (C) xqt, 2010-2014
+# (C) Pywikibot team, 2006-2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import sys
+import re
+import webbrowser
+
+import pywikibot
+from pywikibot import i18n, Bot
+from pywikibot import pagegenerators as pg
+import image
+from imagetransfer import nowCommonsMessage
+
+nowCommons = {
+ '_default': [
+ u'NowCommons'
+ ],
+ 'ar': [
+ u'الآن كومنز',
+ u'الآن كومونز',
+ ],
+ 'de': [
+ u'NowCommons',
+ u'NC',
+ u'NCT',
+ u'Nowcommons',
+ u'NowCommons/Mängel',
+ u'NowCommons-Überprüft',
+ ],
+ 'en': [
+ u'NowCommons',
+ u'Ncd',
+ ],
+ 'eo': [
+ u'Nun en komunejo',
+ u'NowCommons',
+ ],
+ 'fa': [
+ u'موجود در انبار',
+ u'NowCommons',
+ ],
+ 'fr': [
+ u'Image sur Commons',
+ u'DoublonCommons',
+ u'Déjà sur Commons',
+ u'Maintenant sur commons',
+ u'Désormais sur Commons',
+ u'NC',
+ u'NowCommons',
+ u'Nowcommons',
+ u'Sharedupload',
+ u'Sur Commons',
+ u'Sur Commons2',
+ ],
+ 'he': [
+ u'גם בוויקישיתוף'
+ ],
+ 'hu': [
+ u'Azonnali-commons',
+ u'NowCommons',
+ u'Nowcommons',
+ u'NC'
+ ],
+ 'ia': [
+ u'OraInCommons'
+ ],
+ 'it': [
+ u'NowCommons',
+ ],
+ 'ja': [
+ u'NowCommons',
+ ],
+ 'ko': [
+ u'NowCommons',
+ u'공용중복',
+ u'공용 중복',
+ u'Nowcommons',
+ ],
+ 'nds-nl': [
+ u'NoenCommons',
+ u'NowCommons',
+ ],
+ 'nl': [
+ u'NuCommons',
+ u'Nucommons',
+ u'NowCommons',
+ u'Nowcommons',
+ u'NCT',
+ u'Nct',
+ ],
+ 'ro': [
+ u'NowCommons'
+ ],
+ 'ru': [
+ u'NowCommons',
+ u'NCT',
+ u'Nowcommons',
+ u'Now Commons',
+ u'Db-commons',
+ u'Перенесено на Викисклад',
+ u'На Викискладе',
+ ],
+ 'zh': [
+ u'NowCommons',
+ u'Nowcommons',
+ u'NCT',
+ ],
+}
+
+namespaceInTemplate = [
+ 'en',
+ 'ia',
+ 'it',
+ 'ja',
+ 'ko',
+ 'lt',
+ 'ro',
+ 'zh',
+]
+
+# Stemma and stub are images not to be deleted (and are a lot) on it.wikipedia
+# if your project has images like that, put the word often used here to skip
+# them
+word_to_skip = {
+ 'en': [],
+ 'it': ['stemma', 'stub', 'hill40 '],
+}
+
+skips = {
+ '_default': [u'NowCommons'],
+ 'wikipedia': {
+ 'en': [u'NowCommons',
+ u'CommonsNow',
+ u'Nowcommons',
+ u'NowCommonsThis',
+ u'Nowcommons2',
+ u'NCT',
+ u'Nowcommonsthis',
+ u'Moved to commons',
+ u'Now Commons',
+ u'Now at commons',
+ u'Db-nowcommons',
+ u'WikimediaCommons',
+ u'Now commons',
+ u'Do not move to Commons',
+ u'KeepLocal',
+ u'Keeplocal',
+ u'NoCommons',
+ u'Nocommons',
+ u'NotMovedToCommons',
+ u'Nmtc',
+ u'Not moved to Commons',
+ u'Notmovedtocommons',
+ ],
+ 'fy': [u'NowCommons',
+ u'Nowcommons',
+ ],
+ },
+}
+
+
+class NoEnoughData(pywikibot.Error):
+ """ Error class for when the user doesn't specified all the data needed """
+
+
+def tagNowCommons(page):
+
+ imagepage = pywikibot.ImagePage(page.site(), page.title())
+ site = page.site()
+ language = site.language()
+ family = site.family.name
+
+ if not imagepage.fileIsOnCommons():
+
+ if family in skips and language in skips[family]:
+ localskips = skips[family][language]
+ else:
+ localskips = skips['_default']
+
+ for template in imagepage.templates():
+ #FIXME: Move the templates list to a lib.
+ if template in localskips:
+ pywikibot.output(
+ u'The file %s is already tagged with NowCommons'
+ % imagepage.title())
+ return
+
+ imagehash = imagepage.getHash()
+ commons = pywikibot.getSite(u'commons', u'commons')
+ duplicates = commons.getFilesFromAnHash(imagehash)
+ if duplicates:
+ duplicate = duplicates.pop()
+ pywikibot.output(u'Found duplicate image at %s' % duplicate)
+ comment = i18n.twtranslate(imagepage.site(),
+ 'commons-file-now-available',
+ {'localfile': imagepage.title(
+ withNamespace=False),
+ 'commonsfile': duplicate})
+ template = pywikibot.translate(imagepage.site(),
nowCommonsTemplate)
+ newtext = imagepage.get() + template % (duplicate,)
+ pywikibot.showDiff(imagepage.get(), newtext)
+ try:
+ imagepage.put(newtext, comment)
+ except pywikibot.LockedPage:
+ return
+
+
+class NowCommonsDeleteBot(Bot):
+
+ """Bot to delete migrated files."""
+
+ def __init__(self, **kwargs):
+ self.availableOptions.update({
+ 'replace': False,
+ 'replacealways': False,
+ 'replaceloose': False,
+ 'replaceonly': False,
+ 'use_hash': False,
+ })
+ super(NowCommonsDeleteBot, self).__init__(**kwargs)
+
+ self.site = pywikibot.Site()
+ if repr(self.site) == 'commons:commons':
+ sys.exit('Do not run this bot on Commons!')
+
+ def ncTemplates(self):
+ if self.site.lang in nowCommons:
+ return nowCommons[self.site.lang]
+ else:
+ return nowCommons['_default']
+
+ def useHashGenerator(self):
+ #
https://toolserver.org/~multichill/nowcommons.php?language=it&page=2&filter=
+ lang = self.site.lang
+ num_page = 0
+ word_to_skip_translated = i18n.translate(self.site, word_to_skip)
+ images_processed = list()
+ while 1:
+ url = ('https://toolserver.org/~multichill/nowcommons.php?'
+ 'language=%s&page=%s&filter=') % (lang, num_page)
+ HTML_text = self.site.getUrl(url, no_hostname=True)
+ reg = r'<[Aa] href="(?P<urllocal>.*?)">(?P<imagelocal>.*?)\
+ </[Aa]> +?</td><td>\n\s*?'
+ reg += r'<[Aa]
href="(?P<urlcommons>http[s]?://commons.wikimedia.org/.*?)" \
+ >Image:(?P<imagecommons>.*?)</[Aa]> +?</td><td>'
+ regex = re.compile(reg, re.UNICODE)
+ found_something = False
+ change_page = True
+ for x in regex.finditer(HTML_text):
+ found_something = True
+ image_local = x.group('imagelocal')
+ image_commons = x.group('imagecommons')
+ if image_local in images_processed:
+ continue
+ change_page = False
+ images_processed.append(image_local)
+ # Skip images that have something in the title (useful for
+ # it.wiki)
+ image_to_skip = False
+ for word in word_to_skip_translated:
+ if word.lower() in image_local.lower():
+ image_to_skip = True
+ if image_to_skip:
+ continue
+ url_local = x.group('urllocal')
+ url_commons = x.group('urlcommons')
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+ % image_local)
+ pywikibot.output(u'Local: %s\nCommons: %s\n'
+ % (url_local, url_commons))
+ webbrowser.open(url_local, 0, 1)
+ webbrowser.open(url_commons, 0, 1)
+ if image_local.split('Image:')[1] == image_commons:
+ choice = pywikibot.input_yn(
+ u'The local and the commons images have the same name,\
+ ' 'continue?', default=False, automatic_quit=False)
+ else:
+ choice = pywikibot.input_yn(
+ u'Are the two images equal?',
+ default=False, automatic_quit=False)
+ if choice:
+ yield [image_local, image_commons]
+ else:
+ continue
+ # The page is dinamically updated, so we may don't need to change
+ # it
+ if change_page:
+ num_page += 1
+ # If no image found means that there aren't anymore, break.
+ if not found_something:
+ break
+
+ def getPageGenerator(self):
+ if self.getOption('use_hash'):
+ gen = self.useHashGenerator()
+ else:
+ nowCommonsTemplates = [pywikibot.Page(self.site, title,
+ ns=10)
+ for title in self.ncTemplates()]
+ gens = [pg.ReferringPageGenerator(t, followRedirects=True,
+ onlyTemplateInclusion=True)
+ for t in nowCommonsTemplates]
+ gen = pg.CombinedPageGenerator(gens)
+ gen = pg.NamespaceFilterPageGenerator(gen, [6])
+ gen = pg.DuplicateFilterPageGenerator(gen)
+ gen = pg.PreloadingGenerator(gen)
+ return gen
+
+ def findFilenameOnCommons(self, localImagePage):
+ filenameOnCommons = None
+ for templateName, params in localImagePage.templatesWithParams():
+ if templateName in self.ncTemplates():
+ if params == []:
+ filenameOnCommons =\
+ localImagePage.title(withNamespace=False)
+ elif self.site.lang in namespaceInTemplate:
+ skip = False
+ filenameOnCommons = None
+ for par in params:
+ val = par.split('=')
+ if len(val) == 1 and not skip:
+ filenameOnCommons = par[par.index(':') + 1:]
+ break
+ if val[0].strip() == '1':
+ filenameOnCommons =\
+ val[1].strip()[val[1].strip().index(':') + 1:]
+ break
+ skip = True
+ if not filenameOnCommons:
+ filenameOnCommons =\
+ localImagePage.title(withNamespace=False)
+ else:
+ val = params[0].split('=')
+ if len(val) == 1:
+ filenameOnCommons = params[0].strip()
+ else:
+ filenameOnCommons = val[1].strip()
+ return filenameOnCommons
+
+ def run(self):
+ commons = pywikibot.Site('commons', 'commons')
+ comment = i18n.translate(self.site, nowCommonsMessage, fallback=True)
+
+ for page in self.getPageGenerator():
+ if self.getOption('use_hash'):
+ # Page -> Has the namespace | commons image -> Not
+ images_list = page # 0 -> local image, 1 -> commons image
+ page = pywikibot.Page(self.site, images_list[0])
+ else:
+ # If use_hash is true, we have already print this before, no
+ # need
+ self.current_page = page
+ try:
+ localImagePage = pywikibot.FilePage(self.site, page.title())
+ if localImagePage.fileIsShared():
+ pywikibot.output(u'File is already on Commons.')
+ continue
+ sha1 = localImagePage.getFileSHA1Sum()
+ if self.getOption('use_hash'):
+ filenameOnCommons = images_list[1]
+ else:
+ filenameOnCommons = self.findFilenameOnCommons(
+ localImagePage)
+ if not filenameOnCommons and not self.getOption('use_hash'):
+ pywikibot.output(u'NowCommons template not found.')
+ continue
+ commonsImagePage = pywikibot.FilePage(commons, 'Image:%s' %
+ filenameOnCommons)
+ if localImagePage.title(withNamespace=False) == \
+ commonsImagePage.title(withNamespace=False) and \
+ self.getOption('use_hash'):
+ pywikibot.output(
+ u'The local and the commons images have the same name')
+ if localImagePage.title(withNamespace=False) != \
+ commonsImagePage.title(withNamespace=False):
+ usingPages = list(localImagePage.usingPages())
+ if usingPages and usingPages != [localImagePage]:
+ pywikibot.output(
+ u'\"\03{lightred}%s\03{default}\" is\
+ still used in %i pages.'
+ % (localImagePage.title(withNamespace=False),
+ len(usingPages)))
+ if self.getOption('replace') is True:
+ pywikibot.output(
+ u'Replacing
\"\03{lightred}%s\03{default}\" by \
+ \"\03{lightgreen}%s\03{default}\".'
+ % (localImagePage.title(
+ withNamespace=False),
+ commonsImagePage.title(
+ withNamespace=False)))
+ oImageRobot = image.ImageRobot(
+ pg.FileLinksGenerator(localImagePage),
+ localImagePage.title(withNamespace=False),
+ commonsImagePage.title(
+ withNamespace=False),
+ '', self.getOption('replacealways'),
+ self.getOption('replaceloose'))
+ oImageRobot.run()
+ # If the image is used with the urlname the
+ # previous function won't work
+ if len(list(pywikibot.FilePage(self.site,
+ page.title()).usingPages())) > 0 and \
+ self.getOption('replaceloose'):
+ oImageRobot = image.ImageRobot(
+ pg.FileLinksGenerator(
+ localImagePage),
+ localImagePage.title(
+ withNamespace=False, asUrl=True),
+ commonsImagePage.title(
+ withNamespace=False),
+ '', self.getOption('replacealways'),
+ self.getOption('replaceloose'))
+ oImageRobot.run()
+ # refresh because we want the updated list
+ usingPages = len(list(pywikibot.FilePage(
+ self.site, page.title()).usingPages()))
+ if usingPages > 0 and \
+ self.getOption('use_hash'):
+ # just an enter
+ pywikibot.input(
+ u'There are still %s pages with this \
+ image, confirm the manual\
+ removal from them please.\
+ ' % usingPages)
+
+ else:
+ pywikibot.output(u'Please change them manually.')
+ continue
+ else:
+ pywikibot.output(
+ u'No page is using \
+ "\03{lightgreen}%s\03{default}\" anymore.'
+ % localImagePage.title(withNamespace=False))
+ commonsText = commonsImagePage.get()
+ if self.getOption('replaceonly') is False:
+ if sha1 == commonsImagePage.getFileSHA1Sum():
+ pywikibot.output(
+ u'The image is identical to the one on Commons.')
+ if len(localImagePage.getFileVersionHistory()) > 1 and
\
+ not self.getOption('use_hash'):
+ pywikibot.output(
+ u"This image has a version history. Please \
+ delete it manually after making sure that the \
+ old versions are not worth keeping.""")
+ continue
+ if self.getOption('always') is False:
+ pywikibot.output(
+ u'\n\n>>>> Description on\
+ \03{lightpurple}%s\03{default} <<<<\n'
+ % page.title())
+ pywikibot.output(localImagePage.get())
+ pywikibot.output(
+ u'\n\n>>>> Description on\
+ \03{lightpurple}%s\03{default} <<<<\n'
+ % commonsImagePage.title())
+ pywikibot.output(commonsText)
+ if pywikibot.input_yn(
+ u'Does the description on Commons contain '
+ 'all required source and license\n'
+ 'information?',
+ default=False, automatic_quit=False):
+ localImagePage.delete('%s[[:commons:Image:%s]]\
+ ' % (comment, filenameOnCommons
+ ), prompt=False)
+ else:
+ localImagePage.delete(
+ comment + ' [[:commons:Image:%s]]'
+ % filenameOnCommons, prompt=False)
+ else:
+ pywikibot.output(u'The image is not identical to the\
+ one on Commons.')
+ except (pywikibot.NoPage, pywikibot.IsRedirectPage) as e:
+ pywikibot.output(u'%s' % e[0])
+ continue
+
+
+def main(*args):
+ """
+ Process command line arguments and invoke bot.
+
+ If args is an empty list, sys.argv is used.
+
+ @param args: command line arguments
+ @type args: list of unicode
+ """
+ generator = None
+ # Load a lot of default generators
+ genFactory = pagegenerators.GeneratorFactory()
+
+ for arg in pywikibot.handleArgs():
+ genFactory.handleArg(arg)
+
+ generator = genFactory.getCombinedGenerator()
+ if not generator:
+ raise NoEnoughData(
+ 'You have to specify the generator you want to use for the
script!')
+
+ pregenerator = pagegenerators.PreloadingGenerator(generator)
+ for page in pregenerator:
+ if page.exists() and page.namespace() == 6 and \
+ not page.isRedirectPage():
+ tagNowCommons(page)
+
+
+ options = {}
+
+ for arg in pywikibot.handle_args(args):
+ if arg.startswith('-') and \
+ arg[1:] in ('always', 'replace', 'replaceloose', 'replaceonly'):
+ options[arg[1:]] = True
+ elif arg == '-replacealways':
+ options['replace'] = True
+ options['replacealways'] = True
+ elif arg == '-hash':
+ options['use_hash'] = True
+ elif arg == '-autonomous':
+ pywikibot.warning(u"The '-autonomous' argument is DEPRECATED,"
+ u" use '-always' instead.")
+ options['always'] = True
+ bot = NowCommonsDeleteBot(**options)
+ bot.run()
+
+if __name__ == "__main__":
+ main()
--
To view, visit https://gerrit.wikimedia.org/r/181354
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie14931b782afff92c079ec92d5808bf28a368db1
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Prianka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits