Ladsgroup has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/227454

Change subject: [WIP] add scripts/interwikidata.py
......................................................................

[WIP] add scripts/interwikidata.py

It's interwiki.py but for wikis which work with Wikibase.

Change-Id: Ibbb7047d7e6be7b997577b2ea5d662bd6a361af8
---
A scripts/interwikidata.py
1 file changed, 228 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/54/227454/1

diff --git a/scripts/interwikidata.py b/scripts/interwikidata.py
new file mode 100644
index 0000000..cfcc794
--- /dev/null
+++ b/scripts/interwikidata.py
@@ -0,0 +1,228 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+Script to handle interwiki based on Wikidata.
+
+These command line parameters can be used to specify which pages to work on:
+
+&params;
+
+Furthermore, the following command line parameters are supported:
+
+-langs:             Languages to work on.
+
+-cleanall:          Clean all old interwiki from pages in all languages
+-clean:             Clean only determined languages (e.g. -clean:fa,en,de)
+
+-createall:         Create item in Wikidata when no interwiki could be found.
+                        In all languages.
+-create:            Create item only in languages determined
+                        (e.g. -create:en,de)
+"""
+
+# (C) Pywikibot team, 2015
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import unicode_literals
+
+__version__ = '$Id$'
+#
+
+import sys
+
+import pywikibot
+from pywikibot import pagegenerators, CurrentPageBot
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+    '&params;':     pagegenerators.parameterHelp,
+}
+
+tems = {
+    'en': ['Db-meta', 'Article for deletion', 'Proposed deletion'],
+    'nl': [u'Nuweg', u'Artikelweg'],
+    'de': [u'Löschen'],
+    'sv': [u"SFFR"],
+    'fr': [u"Suppression"],
+    'it': [u"Cancellazione"],
+    'ru': [u"Db-meta"],
+    'fa': [u"Db-meta"],
+    'es': [u"Cdb", u"Propb"],
+    'ckb': [u'Db'],
+    'ja': [u"Sakujo"],
+    'vi': [u"Db-meta", u"Mời biểu quyết", u"Proposed deletion"],
+    'pt': [u"Apagar", u"ESR"],
+    'zh': [u"Afd"],
+    'ca': [u"Supressió diferida"],
+    'no': [],
+    'sh': ['Db-meta'],
+    'fi': [u"Poistokeskustelu"],
+    'cs': [u"AfD"]}
+
+summaries = {
+    'fa': u'ربات: حذف پیوندهای میان‌ویکی که در ویکی‌داده موجود است.',
+    'en': u'Bot: Cleaning up interwiki',
+}
+
+langid = {
+    'en': 'Q328',
+    'sv': 'Q169514',
+    'de': 'Q48183',
+    'it': 'Q11920',
+    'no': 'Q191769',
+    'fa': 'Q48952',
+    'es': 'Q8449',
+    'pl': 'Q1551807',
+    'ca': 'Q199693',
+    'fr': 'Q8447',
+    'nl': 'Q10000',
+    'pt': 'Q11921',
+    'ru': 'Q206855',
+    'vi': 'Q200180',
+    'be': 'Q877583',
+    'uk': 'Q199698',
+    'tr': 'Q58255',
+    'cs': 'Q191168',
+    'sh': 'Q58679',
+}
+
+reg = {
+    'en': ["\[\[Category\:Living people", "\[\[[Cc]ategory\:\d{1,4} births"],
+    'de': ["DONTMATCHSDFSG", "\[\[[Kk]ategorie\:Geboren \d{1,4}"],
+    'nl': ["DONTMATCHSDFSG", "DONTMATCHSDFSG"],
+    'sv': [u"\[\[[Kk]ategori\:Levande personer", u"\[\[[Kk]ategori\:Födda 
\d{1,4}"],
+    'fr': [u"\[\[DONTMATCHSDFSG", u"\[\[[Cc]atégorie\:Naissance en \d{1,4}"],
+    'it': [u"\[\[[Cc]ategoria\:Persone viventi", u"\[\[[Cc]ategoria\:Nati nel 
\d{1,4}"],
+    'ru': [u"\[\[Категория\:Ныне живущие", u"\[\[Категория\:Родившиеся в 
\d{1,4} году"],
+    'es': [u"\[\[[Cc]ategoría\:Personas vivas", u"\[\[[Cc]ategoría\:Nacidos en 
\d{1,4}"],
+    'pl': [u"\[\[DONTMATCHSDFSG", u"\[\[[Kk]ategoria\:Urodzeni w \d{1,4}"],
+    'ja': [u"\[\[Category\:存命人物", u"\[\[Category\:\d{1,4}年生"],
+    'vi': [u"\[\[Thể loại\:Nhân vật còn sống", u"\[\[Thể loại\:Sinh \d{1,4}"],
+    'pt': [u"\[\[Categoria\:Pessoas vivas", u"\[\[Categoria\:Nascidos em 
\d{1,4}"],
+    'zh': [u"\[\[Category\:在世人物", u"\[\[Category\:\d{1,4}年出生"],
+    'ca': [u"\[\[Categoria\:Persones vives", u"\[\[DONTMATCHSDFSG"],
+    'no': [u"\[\[Kategori\:Biografier om levende personer", 
u"\[\[Kategori\:Fødsler i \d{1,4}"],
+    'fi': [u"\[\[Luokka\:Elävät henkilöt", u"\[\[Luokka\:Vuonna \d{1,4} 
syntyneet"],
+    'cs': [u"\[\[Kategorie\:Žijící lidé", u"\[\[Kategorie\:Narození \d{1,4}"],
+    'sh': [u"\[\[Kategorija\:Rođeni \d{1,4}\.", u"\[\[Kategorija\:Žive 
ličnosti", u"\[\[Kategorija\:Umrli \d{1,4}\."],
+    'fa': [u"\[\[رده\:افراد زنده", u"\[\[رده\: زادگان "],
+}
+
+
+class IWBot(CurrentPageBot):
+    """docstring for IWBot"""
+    def __init__(self, gen, site, clean=False, create=False):
+        super(IWBot, self).__init__(generator=gen)
+        self.clean = clean
+        self.create = create
+        self.repo = site.data_repository()
+
+    def treat_page(self):
+        if not self.current_page.exists():
+            pywikibot.output('%s does not exist, skipping...'
+                             % self.current_page.title())
+            return
+        try:
+            item = pywikibot.ItemPage.fromPage(self.current_page)
+        except:
+            item = self.try_to_add()
+            if self.create and not item:
+                item = self.create_item()
+        if not item:
+            return
+        self.current_item = item
+        self.add_statements()
+        if self.clean:
+            self.clean_page()
+
+    def create_item(self):
+        #TODO: Labeling
+        pywikibot.output('Creating item...')
+        item = pywikibot.ItemPage(self.repo)
+        db_name = self.current_page.site.dbName()
+        title = self.current_page.title()
+        data = {"sitelinks":
+                {db_name: {"site": db_name, "title": title}}}
+        item.editEntity(data, new='item')
+        return item
+
+    def add_statements(self):
+        pass
+
+    def handle_complicated(self):
+        return False
+
+    def clean_page(self):
+        iwlangs = pywikibot.textlib.getLanguageLinks(self.current_page.text)
+        if not iwlangs:
+            return
+        is_complicated = False
+        for iw_site in iwlangs:
+            if not iw_site.dbName() in self.current_item.sitelinks:
+                is_complicated = True
+        if is_complicated:
+            res = self.handle_complicated()
+        else:
+            res = True
+        if res:
+            pywikibot.output('Cleaning up the page')
+            self.current_page.text = 
pywikibot.textlib.removeLanguageLinks(self.current_page.text)
+            # pywikibot.i18n.translate(self.current_page.site.code, summaries, 
fallback=True)
+            # self.current_page.site.code
+            self.current_page.put(self.current_page.text, 
summary=summaries.get('eg', summaries['en']))
+
+    def try_to_add(self):
+        iwlangs = pywikibot.textlib.getLanguageLinks(self.current_page.text)
+        if not iwlangs:
+            return False
+        wd_data = set()
+        for iw_site in iwlangs:
+            wd_data.add(pywikibot.ItemPage.fromPage(iwlangs[iw_site]))
+        if len(wd_data) != 1:
+            return False
+        self.current_item = list(wd_data)[0]
+        pywikibot.output('Adding link to %s' % self.current_item.title())
+        self.current_item.setSitelink(self.current_page)
+        # I know this is stupid
+        return self.current_item
+
+
+def main(*args):
+    gen = None
+    clean = []
+    create = []
+    langs = []
+    local_args = pywikibot.handle_args(args)
+    genFactory = pagegenerators.GeneratorFactory()
+    for arg in local_args:
+        if arg.startswith('-langs:'):
+            langs = arg[7:].split(',')
+        elif arg.startswith('-cleanall'):
+            clean = True
+        elif arg.startswith('-clean:'):
+            clean = arg[7:].split(',')
+        elif arg.startswith('-createall'):
+            create = True
+        elif arg.startswith('-create:'):
+            create = arg[8:].split(',')
+        else:
+            genFactory.handleArg(arg)
+    if not langs:
+        langs = [pywikibot.Site().code]
+    for lang in langs:
+        site = pywikibot.Site(lang)
+        create_this = create is True or lang in create
+        clean_this = clean is True or lang in clean
+        if not gen:
+            gen = genFactory.getCombinedGenerator()
+        if gen:
+            gen = pagegenerators.PreloadingGenerator(gen)
+            bot = IWBot(gen, site, clean_this, create_this)
+            bot.run()
+        else:
+            pywikibot.showhelp()
+            sys.exit()
+
+main()

-- 
To view, visit https://gerrit.wikimedia.org/r/227454
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibbb7047d7e6be7b997577b2ea5d662bd6a361af8
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgr...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to