Alex Monk has uploaded a new change for review. https://gerrit.wikimedia.org/r/304425
Change subject: Fixes and improvements for maintain-meta_p ...................................................................... Fixes and improvements for maintain-meta_p * Update dblists path * Update for removal of visualeditor-default.dblist * Update for PHP short array syntax when we process InitialiseSettings.php * Change code used to contact wiki API endpoints so we're not caught by the uk.wikimedia.org -> wikimedia.org.uk external 301 * Add a dry run mode, default to on Change-Id: Iacc13766871f5b77bf29346e0fa68c64b0c44f17 --- M maintain-replicas/maintain-meta_p.py 1 file changed, 99 insertions(+), 33 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/software refs/changes/25/304425/1 diff --git a/maintain-replicas/maintain-meta_p.py b/maintain-replicas/maintain-meta_p.py index b3b8998..40008e2 100644 --- a/maintain-replicas/maintain-meta_p.py +++ b/maintain-replicas/maintain-meta_p.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- # Based on work by Marc-André Pelletier, ported to Python by Alex Monk +# Copyright © 2016 Alex Monk <[email protected]> # Copyright © 2015 Alex Monk <[email protected]> # Copyright © 2013 Marc-André Pelletier <[email protected]> # @@ -35,6 +36,7 @@ ('labsdb1002.eqiad.wmnet', 3306), ('labsdb1003.eqiad.wmnet', 3306) ] +DRY_RUN = True import codecs import collections @@ -44,7 +46,6 @@ import pymysql import re import subprocess -import urllib import urllib.request config = ConfigParser() @@ -54,9 +55,9 @@ subprocess.call(["git", "pull"], cwd = "mediawiki-config") -dbs = {db : {} for db in open('mediawiki-config/all.dblist').read().splitlines()} +dbs = {db : {"has_visualeditor": True} for db in open('mediawiki-config/dblists/all.dblist').read().splitlines()} def read_list(listFname, prop, val): - for db in open('mediawiki-config/' + listFname + '.dblist').read().splitlines(): + for db in open('mediawiki-config/dblists/' + listFname + '.dblist').read().splitlines(): if db in dbs: dbs[db][prop] = val @@ -68,7 +69,7 @@ read_list("private", "private", True) read_list("special", "family", "special") read_list("flaggedrevs", "has_flaggedrevs", True) -read_list("visualeditor-default", "has_visualeditor", True) +read_list("visualeditor-nondefault", "has_visualeditor", False) read_list("wikidataclient", "has_wikidata", True) for slice in ['s1', 's2', 's3', 's4', 's5', 's6', 's7']: # TODO: silver @@ -88,9 +89,9 @@ inCanonConfig = False canonical = {} for line in open('mediawiki-config/wmf-config/InitialiseSettings.php').read().splitlines(): - if line == "'wgCanonicalServer' => array(": + if line == "'wgCanonicalServer' => [": inCanonConfig = True - elif inCanonConfig and line == "),": + elif inCanonConfig and line == "],": inCanonConfig = False else: matches = re.match("^\s+'(.*)'\s+=>\s+'(.*)'\s*,\s*$", line) @@ -103,6 +104,14 @@ cached = json.load(cacheFile) except IOError as e: pass + + +class RedirectFilter(urllib.request.HTTPRedirectHandler): + def redirect_request(self, req, fp, code, msg, hdrs, newurl): + return None # do not redirect, let it error so we can catch it + +opener = urllib.request.build_opener(RedirectFilter) +opener.addheaders = [('User-agent', 'operations/software.git maintain-meta_p.py')] for db, dbInfo in dbs.items(): if 'private' in dbInfo and dbInfo['private']: @@ -126,28 +135,44 @@ dbInfo['lang'] = cached[canon]['lang'] dbInfo['name'] = cached[canon]['name'] else: + cached[canon] = {} logging.info("Querying " + canon + "...") try: - req = urllib.request.Request(canon + "/w/api.php?action=query&meta=siteinfo&siprop=general&format=json") - req.add_header("User-Agent", "operations/software.git maintain-meta_p.py") + url = canon + "/w/api.php?action=query&meta=siteinfo&siprop=general&format=json" - with urllib.request.urlopen(req) as response: + with opener.open(url) as response: result = json.load(codecs.getreader("utf-8")(response))['query'] cached[canon]['lang'] = dbInfo['lang'] = result['general']['lang'] cached[canon]['name'] = dbInfo['name'] = result['general']['sitename'] except Exception as e: - logging.exception(e) + if 'getcode' in dir(e) and e.getcode() == 301: + # It tried to redirect us? Um. + cached[canon]['lang'] = dbInfo['lang'] = None + cached[canon]['name'] = dbInfo['name'] = None + else: + logging.exception(e) with open('wiki-cache.json', 'w') as cacheFile: json.dump(cached, cacheFile) + +insertion_query = """INSERT INTO meta_p.wiki + (has_flaggedrevs, has_visualeditor, has_wikidata, is_closed, is_sensitive, dbname, slice, + url, family, lang, name, size) + VALUES (%(has_flaggedrevs)s, %(has_visualeditor)s, %(has_wikidata)s, %(is_closed)s, + %(is_sensitive)s, %(dbname)s, %(slice)s, %(url)s, %(family)s, %(lang)s, + %(name)s, %(size)s);""" for dbhost, dbport in slices: dbh = pymysql.connect(host=dbhost, port=dbport, user=dbuser, passwd=dbpassword, charset='utf8') cursor = dbh.cursor() logging.info("Update/create meta tables on", dbhost + "...") - cursor.execute("CREATE DATABASE IF NOT EXISTS meta_p DEFAULT CHARACTER SET utf8;") - cursor.execute("""CREATE TABLE IF NOT EXISTS meta_p.wiki ( + query = "CREATE DATABASE IF NOT EXISTS meta_p DEFAULT CHARACTER SET utf8;" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = """CREATE TABLE IF NOT EXISTS meta_p.wiki ( dbname varchar(32) PRIMARY KEY, lang varchar(12) NOT NULL DEFAULT 'en', name text, @@ -160,16 +185,36 @@ has_flaggedrevs numeric(1) NOT NULL DEFAULT 0, has_visualeditor numeric(1) NOT NULL DEFAULT 0, has_wikidata numeric(1) NOT NULL DEFAULT 0, - is_sensitive numeric(1) NOT NULL DEFAULT 0);""") - cursor.execute("""CREATE OR REPLACE VIEW meta_p.legacy AS + is_sensitive numeric(1) NOT NULL DEFAULT 0);""" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = """CREATE OR REPLACE VIEW meta_p.legacy AS SELECT dbname, lang, family, NULL AS domain, size, 0 AS is_meta, is_closed, 0 AS is_multilang, (family='wiktionary') AS is_sensitive, NULL AS root_category, slice AS server, '/w/' AS script_path - FROM meta_p.wiki;""") - cursor.execute("""CREATE TABLE IF NOT EXISTS meta_p.properties_anon_whitelist ( - pw_property varbinary(255) PRIMARY KEY);""") - cursor.execute("START TRANSACTION;") - cursor.execute("TRUNCATE meta_p.wiki;") + FROM meta_p.wiki;""" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = """CREATE TABLE IF NOT EXISTS meta_p.properties_anon_whitelist ( + pw_property varbinary(255) PRIMARY KEY);""" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = "START TRANSACTION;" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = "TRUNCATE meta_p.wiki;" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) for db, dbInfo in dbs.items(): if 'deleted' in dbInfo and dbInfo['deleted']: continue @@ -209,21 +254,42 @@ fields['name'] = dbInfo['name'] if 'size' in dbInfo: fields['size'] = dbInfo['size'] - cursor.execute( - "INSERT INTO meta_p.wiki " + - "(has_flaggedrevs, has_visualeditor, has_wikidata, is_closed, is_sensitive, dbname, slice, " + - "url, family, lang, name, size) " + - "VALUES (%(has_flaggedrevs)s, %(has_visualeditor)s, %(has_wikidata)s, %(is_closed)s, " + - "%(is_sensitive)s, %(dbname)s, %(slice)s, %(url)s, %(family)s, %(lang)s, " + - "%(name)s, %(size)s);", - fields - ) + query = """INSERT INTO meta_p.wiki + (has_flaggedrevs, has_visualeditor, has_wikidata, is_closed, is_sensitive, dbname, slice, + url, family, lang, name, size) + VALUES (%(has_flaggedrevs)s, %(has_visualeditor)s, %(has_wikidata)s, %(is_closed)s, + %(is_sensitive)s, %(dbname)s, %(slice)s, %(url)s, %(family)s, %(lang)s, + %(name)s, %(size)s);""" + if DRY_RUN: + print("Running insertion_query using " + str(fields)) + else: + cursor.execute(insertion_query, fields) - cursor.execute("COMMIT;") - cursor.execute("START TRANSACTION;") - cursor.execute("DELETE FROM meta_p.properties_anon_whitelist;") + query = "COMMIT;" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = "START TRANSACTION;" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = "DELETE FROM meta_p.properties_anon_whitelist;" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) # This is hardcoded for now - cursor.execute("INSERT INTO meta_p.properties_anon_whitelist VALUES ('gadget-%');") - cursor.execute("COMMIT;") + query = "INSERT INTO meta_p.properties_anon_whitelist VALUES ('gadget-%');" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) + query = "COMMIT;" + if DRY_RUN: + print("Running " + query) + else: + cursor.execute(query) logging.info("All done.") -- To view, visit https://gerrit.wikimedia.org/r/304425 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iacc13766871f5b77bf29346e0fa68c64b0c44f17 Gerrit-PatchSet: 1 Gerrit-Project: operations/software Gerrit-Branch: master Gerrit-Owner: Alex Monk <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
