Ladsgroup has uploaded a new change for review. https://gerrit.wikimedia.org/r/227454
Change subject: [WIP] add scripts/interwikidata.py ...................................................................... [WIP] add scripts/interwikidata.py It's interwiki.py but for wikis which work with Wikibase. Change-Id: Ibbb7047d7e6be7b997577b2ea5d662bd6a361af8 --- A scripts/interwikidata.py 1 file changed, 228 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/54/227454/1 diff --git a/scripts/interwikidata.py b/scripts/interwikidata.py new file mode 100644 index 0000000..cfcc794 --- /dev/null +++ b/scripts/interwikidata.py @@ -0,0 +1,228 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +""" +Script to handle interwiki based on Wikidata. + +These command line parameters can be used to specify which pages to work on: + +¶ms; + +Furthermore, the following command line parameters are supported: + +-langs: Languages to work on. + +-cleanall: Clean all old interwiki from pages in all languages +-clean: Clean only determined languages (e.g. -clean:fa,en,de) + +-createall: Create item in Wikidata when no interwiki could be found. + In all languages. +-create: Create item only in languages determined + (e.g. -create:en,de) +""" + +# (C) Pywikibot team, 2015 +# +# Distributed under the terms of the MIT license. +# +from __future__ import unicode_literals + +__version__ = '$Id$' +# + +import sys + +import pywikibot +from pywikibot import pagegenerators, CurrentPageBot + +# This is required for the text that is shown when you run this script +# with the parameter -help. +docuReplacements = { + '¶ms;': pagegenerators.parameterHelp, +} + +tems = { + 'en': ['Db-meta', 'Article for deletion', 'Proposed deletion'], + 'nl': [u'Nuweg', u'Artikelweg'], + 'de': [u'Löschen'], + 'sv': [u"SFFR"], + 'fr': [u"Suppression"], + 'it': [u"Cancellazione"], + 'ru': [u"Db-meta"], + 'fa': [u"Db-meta"], + 'es': [u"Cdb", u"Propb"], + 'ckb': [u'Db'], + 'ja': [u"Sakujo"], + 'vi': [u"Db-meta", u"Mời biểu quyết", u"Proposed deletion"], + 'pt': [u"Apagar", u"ESR"], + 'zh': [u"Afd"], + 'ca': [u"Supressió diferida"], + 'no': [], + 'sh': ['Db-meta'], + 'fi': [u"Poistokeskustelu"], + 'cs': [u"AfD"]} + +summaries = { + 'fa': u'ربات: حذف پیوندهای میانویکی که در ویکیداده موجود است.', + 'en': u'Bot: Cleaning up interwiki', +} + +langid = { + 'en': 'Q328', + 'sv': 'Q169514', + 'de': 'Q48183', + 'it': 'Q11920', + 'no': 'Q191769', + 'fa': 'Q48952', + 'es': 'Q8449', + 'pl': 'Q1551807', + 'ca': 'Q199693', + 'fr': 'Q8447', + 'nl': 'Q10000', + 'pt': 'Q11921', + 'ru': 'Q206855', + 'vi': 'Q200180', + 'be': 'Q877583', + 'uk': 'Q199698', + 'tr': 'Q58255', + 'cs': 'Q191168', + 'sh': 'Q58679', +} + +reg = { + 'en': ["\[\[Category\:Living people", "\[\[[Cc]ategory\:\d{1,4} births"], + 'de': ["DONTMATCHSDFSG", "\[\[[Kk]ategorie\:Geboren \d{1,4}"], + 'nl': ["DONTMATCHSDFSG", "DONTMATCHSDFSG"], + 'sv': [u"\[\[[Kk]ategori\:Levande personer", u"\[\[[Kk]ategori\:Födda \d{1,4}"], + 'fr': [u"\[\[DONTMATCHSDFSG", u"\[\[[Cc]atégorie\:Naissance en \d{1,4}"], + 'it': [u"\[\[[Cc]ategoria\:Persone viventi", u"\[\[[Cc]ategoria\:Nati nel \d{1,4}"], + 'ru': [u"\[\[Категория\:Ныне живущие", u"\[\[Категория\:Родившиеся в \d{1,4} году"], + 'es': [u"\[\[[Cc]ategoría\:Personas vivas", u"\[\[[Cc]ategoría\:Nacidos en \d{1,4}"], + 'pl': [u"\[\[DONTMATCHSDFSG", u"\[\[[Kk]ategoria\:Urodzeni w \d{1,4}"], + 'ja': [u"\[\[Category\:存命人物", u"\[\[Category\:\d{1,4}年生"], + 'vi': [u"\[\[Thể loại\:Nhân vật còn sống", u"\[\[Thể loại\:Sinh \d{1,4}"], + 'pt': [u"\[\[Categoria\:Pessoas vivas", u"\[\[Categoria\:Nascidos em \d{1,4}"], + 'zh': [u"\[\[Category\:在世人物", u"\[\[Category\:\d{1,4}年出生"], + 'ca': [u"\[\[Categoria\:Persones vives", u"\[\[DONTMATCHSDFSG"], + 'no': [u"\[\[Kategori\:Biografier om levende personer", u"\[\[Kategori\:Fødsler i \d{1,4}"], + 'fi': [u"\[\[Luokka\:Elävät henkilöt", u"\[\[Luokka\:Vuonna \d{1,4} syntyneet"], + 'cs': [u"\[\[Kategorie\:Žijící lidé", u"\[\[Kategorie\:Narození \d{1,4}"], + 'sh': [u"\[\[Kategorija\:Rođeni \d{1,4}\.", u"\[\[Kategorija\:Žive ličnosti", u"\[\[Kategorija\:Umrli \d{1,4}\."], + 'fa': [u"\[\[رده\:افراد زنده", u"\[\[رده\: زادگان "], +} + + +class IWBot(CurrentPageBot): + """docstring for IWBot""" + def __init__(self, gen, site, clean=False, create=False): + super(IWBot, self).__init__(generator=gen) + self.clean = clean + self.create = create + self.repo = site.data_repository() + + def treat_page(self): + if not self.current_page.exists(): + pywikibot.output('%s does not exist, skipping...' + % self.current_page.title()) + return + try: + item = pywikibot.ItemPage.fromPage(self.current_page) + except: + item = self.try_to_add() + if self.create and not item: + item = self.create_item() + if not item: + return + self.current_item = item + self.add_statements() + if self.clean: + self.clean_page() + + def create_item(self): + #TODO: Labeling + pywikibot.output('Creating item...') + item = pywikibot.ItemPage(self.repo) + db_name = self.current_page.site.dbName() + title = self.current_page.title() + data = {"sitelinks": + {db_name: {"site": db_name, "title": title}}} + item.editEntity(data, new='item') + return item + + def add_statements(self): + pass + + def handle_complicated(self): + return False + + def clean_page(self): + iwlangs = pywikibot.textlib.getLanguageLinks(self.current_page.text) + if not iwlangs: + return + is_complicated = False + for iw_site in iwlangs: + if not iw_site.dbName() in self.current_item.sitelinks: + is_complicated = True + if is_complicated: + res = self.handle_complicated() + else: + res = True + if res: + pywikibot.output('Cleaning up the page') + self.current_page.text = pywikibot.textlib.removeLanguageLinks(self.current_page.text) + # pywikibot.i18n.translate(self.current_page.site.code, summaries, fallback=True) + # self.current_page.site.code + self.current_page.put(self.current_page.text, summary=summaries.get('eg', summaries['en'])) + + def try_to_add(self): + iwlangs = pywikibot.textlib.getLanguageLinks(self.current_page.text) + if not iwlangs: + return False + wd_data = set() + for iw_site in iwlangs: + wd_data.add(pywikibot.ItemPage.fromPage(iwlangs[iw_site])) + if len(wd_data) != 1: + return False + self.current_item = list(wd_data)[0] + pywikibot.output('Adding link to %s' % self.current_item.title()) + self.current_item.setSitelink(self.current_page) + # I know this is stupid + return self.current_item + + +def main(*args): + gen = None + clean = [] + create = [] + langs = [] + local_args = pywikibot.handle_args(args) + genFactory = pagegenerators.GeneratorFactory() + for arg in local_args: + if arg.startswith('-langs:'): + langs = arg[7:].split(',') + elif arg.startswith('-cleanall'): + clean = True + elif arg.startswith('-clean:'): + clean = arg[7:].split(',') + elif arg.startswith('-createall'): + create = True + elif arg.startswith('-create:'): + create = arg[8:].split(',') + else: + genFactory.handleArg(arg) + if not langs: + langs = [pywikibot.Site().code] + for lang in langs: + site = pywikibot.Site(lang) + create_this = create is True or lang in create + clean_this = clean is True or lang in clean + if not gen: + gen = genFactory.getCombinedGenerator() + if gen: + gen = pagegenerators.PreloadingGenerator(gen) + bot = IWBot(gen, site, clean_this, create_this) + bot.run() + else: + pywikibot.showhelp() + sys.exit() + +main() -- To view, visit https://gerrit.wikimedia.org/r/227454 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ibbb7047d7e6be7b997577b2ea5d662bd6a361af8 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Ladsgroup <ladsgr...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits