Alex Monk has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/304425

Change subject: Fixes and improvements for maintain-meta_p
......................................................................

Fixes and improvements for maintain-meta_p

* Update dblists path
* Update for removal of visualeditor-default.dblist
* Update for PHP short array syntax when we process InitialiseSettings.php
* Change code used to contact wiki API endpoints so we're not caught by the
  uk.wikimedia.org -> wikimedia.org.uk external 301
* Add a dry run mode, default to on

Change-Id: Iacc13766871f5b77bf29346e0fa68c64b0c44f17
---
M maintain-replicas/maintain-meta_p.py
1 file changed, 99 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/software 
refs/changes/25/304425/1

diff --git a/maintain-replicas/maintain-meta_p.py 
b/maintain-replicas/maintain-meta_p.py
index b3b8998..40008e2 100644
--- a/maintain-replicas/maintain-meta_p.py
+++ b/maintain-replicas/maintain-meta_p.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 #  Based on work by Marc-André Pelletier, ported to Python by Alex Monk
+#  Copyright © 2016 Alex Monk <[email protected]>
 #  Copyright © 2015 Alex Monk <[email protected]>
 #  Copyright © 2013 Marc-André Pelletier <[email protected]>
 #
@@ -35,6 +36,7 @@
     ('labsdb1002.eqiad.wmnet', 3306),
     ('labsdb1003.eqiad.wmnet', 3306)
 ]
+DRY_RUN = True
 
 import codecs
 import collections
@@ -44,7 +46,6 @@
 import pymysql
 import re
 import subprocess
-import urllib
 import urllib.request
 
 config = ConfigParser()
@@ -54,9 +55,9 @@
 
 subprocess.call(["git", "pull"], cwd = "mediawiki-config")
 
-dbs = {db : {} for db in 
open('mediawiki-config/all.dblist').read().splitlines()}
+dbs = {db : {"has_visualeditor": True} for db in 
open('mediawiki-config/dblists/all.dblist').read().splitlines()}
 def read_list(listFname, prop, val):
-    for db in open('mediawiki-config/' + listFname + 
'.dblist').read().splitlines():
+    for db in open('mediawiki-config/dblists/' + listFname + 
'.dblist').read().splitlines():
         if db in dbs:
             dbs[db][prop] = val
 
@@ -68,7 +69,7 @@
 read_list("private", "private", True)
 read_list("special", "family", "special")
 read_list("flaggedrevs", "has_flaggedrevs", True)
-read_list("visualeditor-default", "has_visualeditor", True)
+read_list("visualeditor-nondefault", "has_visualeditor", False)
 read_list("wikidataclient", "has_wikidata", True)
 
 for slice in ['s1', 's2', 's3', 's4', 's5', 's6', 's7']: # TODO: silver
@@ -88,9 +89,9 @@
 inCanonConfig = False
 canonical = {}
 for line in 
open('mediawiki-config/wmf-config/InitialiseSettings.php').read().splitlines():
-    if line == "'wgCanonicalServer' => array(":
+    if line == "'wgCanonicalServer' => [":
         inCanonConfig = True
-    elif inCanonConfig and line == "),":
+    elif inCanonConfig and line == "],":
         inCanonConfig = False
     else:
         matches = re.match("^\s+'(.*)'\s+=>\s+'(.*)'\s*,\s*$", line)
@@ -103,6 +104,14 @@
         cached = json.load(cacheFile)
 except IOError as e:
     pass
+
+
+class RedirectFilter(urllib.request.HTTPRedirectHandler):
+    def redirect_request(self, req, fp, code, msg, hdrs, newurl):
+        return None # do not redirect, let it error so we can catch it
+
+opener = urllib.request.build_opener(RedirectFilter)
+opener.addheaders = [('User-agent', 'operations/software.git 
maintain-meta_p.py')]
 
 for db, dbInfo in dbs.items():
     if 'private' in dbInfo and dbInfo['private']:
@@ -126,28 +135,44 @@
             dbInfo['lang'] = cached[canon]['lang']
             dbInfo['name'] = cached[canon]['name']
         else:
+            cached[canon] = {}
             logging.info("Querying " + canon + "...")
             try:
-                req = urllib.request.Request(canon + 
"/w/api.php?action=query&meta=siteinfo&siprop=general&format=json")
-                req.add_header("User-Agent", "operations/software.git 
maintain-meta_p.py")
+                url = canon + 
"/w/api.php?action=query&meta=siteinfo&siprop=general&format=json"
 
-                with urllib.request.urlopen(req) as response:
+                with opener.open(url) as response:
                     result = 
json.load(codecs.getreader("utf-8")(response))['query']
                     cached[canon]['lang'] = dbInfo['lang'] = 
result['general']['lang']
                     cached[canon]['name'] = dbInfo['name'] = 
result['general']['sitename']
             except Exception as e:
-                logging.exception(e)
+                if 'getcode' in dir(e) and e.getcode() == 301:
+                    # It tried to redirect us? Um.
+                    cached[canon]['lang'] = dbInfo['lang'] = None
+                    cached[canon]['name'] = dbInfo['name'] = None
+                else:
+                    logging.exception(e)
 
 with open('wiki-cache.json', 'w') as cacheFile:
     json.dump(cached, cacheFile)
+
+insertion_query = """INSERT INTO meta_p.wiki
+            (has_flaggedrevs, has_visualeditor, has_wikidata, is_closed, 
is_sensitive, dbname, slice,
+                url, family, lang, name, size)
+            VALUES (%(has_flaggedrevs)s, %(has_visualeditor)s, 
%(has_wikidata)s, %(is_closed)s,
+                %(is_sensitive)s, %(dbname)s, %(slice)s, %(url)s, %(family)s, 
%(lang)s,
+                %(name)s, %(size)s);"""
 
 for dbhost, dbport in slices:
     dbh = pymysql.connect(host=dbhost, port=dbport, user=dbuser, 
passwd=dbpassword, charset='utf8')
     cursor = dbh.cursor()
 
     logging.info("Update/create meta tables on", dbhost + "...")
-    cursor.execute("CREATE DATABASE IF NOT EXISTS meta_p DEFAULT CHARACTER SET 
utf8;")
-    cursor.execute("""CREATE TABLE IF NOT EXISTS meta_p.wiki (
+    query = "CREATE DATABASE IF NOT EXISTS meta_p DEFAULT CHARACTER SET utf8;"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = """CREATE TABLE IF NOT EXISTS meta_p.wiki (
         dbname varchar(32) PRIMARY KEY,
         lang varchar(12) NOT NULL DEFAULT 'en',
         name text,
@@ -160,16 +185,36 @@
         has_flaggedrevs numeric(1) NOT NULL DEFAULT 0,
         has_visualeditor numeric(1) NOT NULL DEFAULT 0,
         has_wikidata numeric(1) NOT NULL DEFAULT 0,
-        is_sensitive numeric(1) NOT NULL DEFAULT 0);""")
-    cursor.execute("""CREATE OR REPLACE VIEW meta_p.legacy AS
+        is_sensitive numeric(1) NOT NULL DEFAULT 0);"""
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = """CREATE OR REPLACE VIEW meta_p.legacy AS
         SELECT dbname, lang, family, NULL AS domain, size, 0 AS is_meta,
                is_closed, 0 AS is_multilang, (family='wiktionary') AS 
is_sensitive,
                NULL AS root_category, slice AS server, '/w/' AS script_path
-            FROM meta_p.wiki;""")
-    cursor.execute("""CREATE TABLE IF NOT EXISTS 
meta_p.properties_anon_whitelist (
-        pw_property varbinary(255) PRIMARY KEY);""")
-    cursor.execute("START TRANSACTION;")
-    cursor.execute("TRUNCATE meta_p.wiki;")
+            FROM meta_p.wiki;"""
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = """CREATE TABLE IF NOT EXISTS meta_p.properties_anon_whitelist (
+        pw_property varbinary(255) PRIMARY KEY);"""
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = "START TRANSACTION;"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = "TRUNCATE meta_p.wiki;"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
     for db, dbInfo in dbs.items():
         if 'deleted' in dbInfo and dbInfo['deleted']:
             continue
@@ -209,21 +254,42 @@
             fields['name'] = dbInfo['name']
         if 'size' in dbInfo:
             fields['size'] = dbInfo['size']
-        cursor.execute(
-            "INSERT INTO meta_p.wiki " +
-            "(has_flaggedrevs, has_visualeditor, has_wikidata, is_closed, 
is_sensitive, dbname, slice, " +
-                "url, family, lang, name, size) " +
-            "VALUES (%(has_flaggedrevs)s, %(has_visualeditor)s, 
%(has_wikidata)s, %(is_closed)s, " +
-                "%(is_sensitive)s, %(dbname)s, %(slice)s, %(url)s, %(family)s, 
%(lang)s, " +
-                "%(name)s, %(size)s);",
-            fields
-        )
+        query = """INSERT INTO meta_p.wiki
+            (has_flaggedrevs, has_visualeditor, has_wikidata, is_closed, 
is_sensitive, dbname, slice,
+                url, family, lang, name, size)
+            VALUES (%(has_flaggedrevs)s, %(has_visualeditor)s, 
%(has_wikidata)s, %(is_closed)s,
+                %(is_sensitive)s, %(dbname)s, %(slice)s, %(url)s, %(family)s, 
%(lang)s,
+                %(name)s, %(size)s);"""
+        if DRY_RUN:
+            print("Running insertion_query using " + str(fields))
+        else:
+            cursor.execute(insertion_query, fields)
 
-    cursor.execute("COMMIT;")
-    cursor.execute("START TRANSACTION;")
-    cursor.execute("DELETE FROM meta_p.properties_anon_whitelist;")
+    query = "COMMIT;"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = "START TRANSACTION;"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = "DELETE FROM meta_p.properties_anon_whitelist;"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
     # This is hardcoded for now
-    cursor.execute("INSERT INTO meta_p.properties_anon_whitelist VALUES 
('gadget-%');")
-    cursor.execute("COMMIT;")
+    query = "INSERT INTO meta_p.properties_anon_whitelist VALUES ('gadget-%');"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
+    query = "COMMIT;"
+    if DRY_RUN:
+        print("Running " + query)
+    else:
+        cursor.execute(query)
 
 logging.info("All done.")

-- 
To view, visit https://gerrit.wikimedia.org/r/304425
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iacc13766871f5b77bf29346e0fa68c64b0c44f17
Gerrit-PatchSet: 1
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: Alex Monk <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to