Yuvipanda has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/57835


Change subject: Add support for executing Python code along with SQL for csv gen
......................................................................

Add support for executing Python code along with SQL for csv gen

Contains a general refactor to make things slightly objecty.
Everything is now picked up from the conf file, nothing hard coded

Also has a python script that provides a graph of user's mobile
upload count and general edit count. It needs to be a python
script instead of just SQL because it pulls in data from two
different databases on two different hosts.

Change-Id: If5d3b2f53dac5071738b4fee2162346d64cae409
---
M generate.py
M mobile/config.yaml
A mobile/uploaders-experience.py
3 files changed, 159 insertions(+), 38 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/limn-mobile-data 
refs/changes/35/57835/1

diff --git a/generate.py b/generate.py
index 020c0bd..8536272 100644
--- a/generate.py
+++ b/generate.py
@@ -1,51 +1,88 @@
 import os
 import sys
-import glob
+import csv
+import imp
+
+import yaml
 import MySQLdb as mysql
 from jinja2 import Template
-import yaml
-import csv
 
-conn = mysql.connect(
-    host=os.environ.get("STAT_HOST", "s1-analytics-slave.eqiad.wmnet"),
-    port=int(os.environ.get("STAT_PORT", 3306)),
-    read_default_file='/a/.my.cnf.research',
-    db="log"
-)
-#conn = mysql.connect("s1-analytics-slave.eqiad.wmnet", 
read_default_file=os.path.expanduser('~/.my.cnf.research'), db="log")
 
-def execute(sql):
-    cur = conn.cursor()
-    cur.execute(sql)
-    return cur
+class DataGenerator():
+    def __init__(self, folder_path):
+        self.folder_path = folder_path
+        self.config = yaml.load(open(os.path.join(self.folder_path, 
"config.yaml")))
+        self.connections = {}
 
-def render(template, env):
-    t = Template(template)
-    return t.render(**env)
+    def conn_for(self, name):
+        if name not in self.config['databases']:
+            raise ValueError("No such database %s" % name)
+
+        if name not in self.connections:
+            db = self.config['databases'][name]
+            self.connections[name] = mysql.connect(
+                    host=db['host'],
+                    port=db['port'],
+                    read_default_file=db['creds_file'],
+                    db=db['db'],
+                    charset='utf8',
+                    use_unicode=True
+                    )
+
+        return self.connections[name]
+
+    def render(self, template):
+        t = Template(template)
+        return t.render(**self.config)
+
+    def execute_sql(self, file_name, db_name):
+        sql = self.render(open(file_name).read())
+        conn = self.conn_for(db_name)
+        cursor = conn.cursor()
+        cursor.execute(sql)
+        rows = cursor.fetchall()
+
+        headers = [field[0] for field in cursor.description]
+
+        return (headers, rows)
+
+    def execute_python(self, name, file_path):
+        module = imp.load_source(name, file_path)
+        return module.execute(self)
+
+    def execute(self):
+
+        for key, value in self.config['graphs'].iteritems():
+            # Look for the sql first, then python
+            db_name = value.get('db', self.config['defaults']['db'])
+            headers = None
+            rows = None
+            file_path = None
+            if os.path.exists(os.path.join(folder, key + '.sql')):
+                file_path = os.path.join(folder, key + '.sql')
+                headers, rows = self.execute_sql(file_path, db_name)
+            elif os.path.exists(os.path.join(folder, key + '.py')):
+                file_path = os.path.join(folder, key + '.py')
+                headers, rows = self.execute_python(key, file_path)
+            else:
+                raise ValueError("Can not find SQL or Python for %s" % key)
+
+
+            print "Generating %s (%s)" % (value['title'], file_path)
+
+            csvOutput = open(os.path.join(self.config['output']['path'], key + 
'.csv'), 'w')
+            csvOutputWriter = csv.writer(csvOutput)
+
+            csvOutputWriter.writerow(headers)
+            csvOutputWriter.writerows(rows)
+
+            csvOutput.close()
 
 if __name__ == "__main__":
     if len(sys.argv) != 2: #FIXME: argparse please
         print "Usage: generate.py <folder with config.yaml and *.sql files>"
-        sys.exit(-1)
+        sys.exit(1)
 
     folder = sys.argv[1]
-    config = yaml.load(open(os.path.join(folder, "config.yaml")))
-    graphs = dict([
-        (   os.path.basename(filename).split(".")[0], 
-            render(open(filename).read(), config)
-        ) for filename in glob.glob(os.path.join(folder, "*.sql"))
-    ])
-    #url_fmt = 'http://stat1001.wikimedia.org/mobile-dashboard/%s'
-    for key, sql in graphs.items():
-        print "Generating %s" % key
-        cursor = execute(sql)
-        rows = cursor.fetchall()
-        
-        csvOutput = open(os.path.join('/a/limn-public-data/mobile/datafiles', 
key + '.csv'), 'w')
-        csvOutputWriter = csv.writer(csvOutput)
-
-        headers = [field[0] for field in cursor.description]
-        csvOutputWriter.writerow(headers)
-        csvOutputWriter.writerows(rows)
-
-        csvOutput.close()
+    dg = DataGenerator(folder)
+    dg.execute()
diff --git a/mobile/config.yaml b/mobile/config.yaml
index c242a7d..fae1c4c 100644
--- a/mobile/config.yaml
+++ b/mobile/config.yaml
@@ -1,3 +1,14 @@
+databases:
+    el:
+        host: "s1-analytics-slave.eqiad.wmnet"
+        port: 3306
+        creds_file: /a/.my.cnf.research
+        db: log
+    commons:
+        host: "s4-analytics-slave.eqiad.wmnet"
+        port: 3306
+        creds_file: /a/.my.cnf.research
+        db: commonswiki
 tables:
         upload_attempts: (SELECT id, uuid, clientIp, isTruncated, 
clientValidated, timestamp, webHost, wiki, event_appversion, event_device, 
event_filename, 0 as event_multiple, event_platform, event_result, 
event_source, event_username FROM MobileAppUploadAttempts_5257716 UNION SELECT 
* FROM MobileAppUploadAttempts_5334329) as MobileAppUploadAttempts
         login_attempts: MobileAppLoginAttempts_5257721
@@ -5,6 +16,10 @@
         upload_web: MobileWebUploads_5281063
 intervals:
         running_average: 30
+output:
+    path: /a/limn-public-data/mobile/datafiles
+defaults:
+    db: el
 graphs:
         unique-uploaders:
                 title: Unique uploaders (over last 30 days)
@@ -20,3 +35,5 @@
                 title: Successful Uploads (per day)
         share-attempts:
                 title: Share Attempts (per day)
+        uploaders-experience:
+                title: Mobile Uploads vs Commons Edit count
diff --git a/mobile/uploaders-experience.py b/mobile/uploaders-experience.py
new file mode 100644
index 0000000..cd38076
--- /dev/null
+++ b/mobile/uploaders-experience.py
@@ -0,0 +1,67 @@
+mobile_uploaders_android_sql = u"""
+SELECT  event_username, count(*) AS uploads
+FROM    {{tables.upload_attempts}}
+WHERE   event_platform LIKE 'Android%'
+        AND event_result="success"
+        AND wiki="commonswiki"
+GROUP BY event_username"""
+
+mobile_uploaders_ios_sql = u"""
+SELECT  event_username, count(*) AS uploads
+FROM    {{tables.upload_attempts}}
+WHERE   event_platform LIKE 'iOS%'
+        AND event_result="success"
+        AND wiki="commonswiki"
+GROUP BY event_username"""
+
+mobile_uploaders_web_sql = u"""
+SELECT  event_username, count(*) AS uploads
+FROM    {{tables.upload_web}}
+WHERE   event_action="success"
+        AND wiki != "testwiki"
+GROUP BY event_username"""
+
+edit_count_sql_template = u"""
+SELECT  user_name, user_editcount
+FROM    user
+WHERE   user_name in (%s)
+"""
+
+headers = ["source", "uploads", "edits"]
+def results_for(dg, sql, source):
+    uploads = {}
+
+    commons = dg.conn_for('commons')
+    el = dg.conn_for('el')
+
+    uploads_cursor = el.cursor()
+    uploads_cursor.execute(dg.render(sql))
+
+    for row in uploads_cursor:
+        username = row[0].strip()
+        username = username[0].upper() + username[1:]
+        # Default Edit Count to 0. Happens only if username is not found in 
commons
+        # Only case seems to be PhilK10, super early Commons iOS app tester 
who has
+        # An account on testwiki, but not on Commons
+        # The earliest version of the iOS app didn't differentiate between 
test and commons
+        uploads[username] = [source, row[1], 0]
+
+    edit_count_sql = edit_count_sql_template % ('%s,' * 
len(uploads.keys()))[:-1]
+    edits_cursor = commons.cursor()
+    edits_cursor.execute(edit_count_sql, uploads.keys())
+
+    for row in edits_cursor:
+        username = row[0].strip().decode('utf8')
+        username = username[0].upper() + username[1:]
+        uploads[username][-1] = row[1]
+    return uploads.values()
+
+def execute(dg):
+
+    results = []
+    results.extend(results_for(dg, mobile_uploaders_ios_sql, "iOS"))
+    results.extend(results_for(dg, mobile_uploaders_android_sql, "Android"))
+    # Commenting out, because web doesn't track username. UGH
+    #results.extend(results_for(dg, mobile_uploaders_web_sql, "Web"))
+
+    return (headers, results)

-- 
To view, visit https://gerrit.wikimedia.org/r/57835
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If5d3b2f53dac5071738b4fee2162346d64cae409
Gerrit-PatchSet: 1
Gerrit-Project: analytics/limn-mobile-data
Gerrit-Branch: master
Gerrit-Owner: Yuvipanda <yuvipa...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to