Yuvipanda has uploaded a new change for review. https://gerrit.wikimedia.org/r/57835
Change subject: Add support for executing Python code along with SQL for csv gen ...................................................................... Add support for executing Python code along with SQL for csv gen Contains a general refactor to make things slightly objecty. Everything is now picked up from the conf file, nothing hard coded Also has a python script that provides a graph of user's mobile upload count and general edit count. It needs to be a python script instead of just SQL because it pulls in data from two different databases on two different hosts. Change-Id: If5d3b2f53dac5071738b4fee2162346d64cae409 --- M generate.py M mobile/config.yaml A mobile/uploaders-experience.py 3 files changed, 159 insertions(+), 38 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/limn-mobile-data refs/changes/35/57835/1 diff --git a/generate.py b/generate.py index 020c0bd..8536272 100644 --- a/generate.py +++ b/generate.py @@ -1,51 +1,88 @@ import os import sys -import glob +import csv +import imp + +import yaml import MySQLdb as mysql from jinja2 import Template -import yaml -import csv -conn = mysql.connect( - host=os.environ.get("STAT_HOST", "s1-analytics-slave.eqiad.wmnet"), - port=int(os.environ.get("STAT_PORT", 3306)), - read_default_file='/a/.my.cnf.research', - db="log" -) -#conn = mysql.connect("s1-analytics-slave.eqiad.wmnet", read_default_file=os.path.expanduser('~/.my.cnf.research'), db="log") -def execute(sql): - cur = conn.cursor() - cur.execute(sql) - return cur +class DataGenerator(): + def __init__(self, folder_path): + self.folder_path = folder_path + self.config = yaml.load(open(os.path.join(self.folder_path, "config.yaml"))) + self.connections = {} -def render(template, env): - t = Template(template) - return t.render(**env) + def conn_for(self, name): + if name not in self.config['databases']: + raise ValueError("No such database %s" % name) + + if name not in self.connections: + db = self.config['databases'][name] + self.connections[name] = mysql.connect( + host=db['host'], + port=db['port'], + read_default_file=db['creds_file'], + db=db['db'], + charset='utf8', + use_unicode=True + ) + + return self.connections[name] + + def render(self, template): + t = Template(template) + return t.render(**self.config) + + def execute_sql(self, file_name, db_name): + sql = self.render(open(file_name).read()) + conn = self.conn_for(db_name) + cursor = conn.cursor() + cursor.execute(sql) + rows = cursor.fetchall() + + headers = [field[0] for field in cursor.description] + + return (headers, rows) + + def execute_python(self, name, file_path): + module = imp.load_source(name, file_path) + return module.execute(self) + + def execute(self): + + for key, value in self.config['graphs'].iteritems(): + # Look for the sql first, then python + db_name = value.get('db', self.config['defaults']['db']) + headers = None + rows = None + file_path = None + if os.path.exists(os.path.join(folder, key + '.sql')): + file_path = os.path.join(folder, key + '.sql') + headers, rows = self.execute_sql(file_path, db_name) + elif os.path.exists(os.path.join(folder, key + '.py')): + file_path = os.path.join(folder, key + '.py') + headers, rows = self.execute_python(key, file_path) + else: + raise ValueError("Can not find SQL or Python for %s" % key) + + + print "Generating %s (%s)" % (value['title'], file_path) + + csvOutput = open(os.path.join(self.config['output']['path'], key + '.csv'), 'w') + csvOutputWriter = csv.writer(csvOutput) + + csvOutputWriter.writerow(headers) + csvOutputWriter.writerows(rows) + + csvOutput.close() if __name__ == "__main__": if len(sys.argv) != 2: #FIXME: argparse please print "Usage: generate.py <folder with config.yaml and *.sql files>" - sys.exit(-1) + sys.exit(1) folder = sys.argv[1] - config = yaml.load(open(os.path.join(folder, "config.yaml"))) - graphs = dict([ - ( os.path.basename(filename).split(".")[0], - render(open(filename).read(), config) - ) for filename in glob.glob(os.path.join(folder, "*.sql")) - ]) - #url_fmt = 'http://stat1001.wikimedia.org/mobile-dashboard/%s' - for key, sql in graphs.items(): - print "Generating %s" % key - cursor = execute(sql) - rows = cursor.fetchall() - - csvOutput = open(os.path.join('/a/limn-public-data/mobile/datafiles', key + '.csv'), 'w') - csvOutputWriter = csv.writer(csvOutput) - - headers = [field[0] for field in cursor.description] - csvOutputWriter.writerow(headers) - csvOutputWriter.writerows(rows) - - csvOutput.close() + dg = DataGenerator(folder) + dg.execute() diff --git a/mobile/config.yaml b/mobile/config.yaml index c242a7d..fae1c4c 100644 --- a/mobile/config.yaml +++ b/mobile/config.yaml @@ -1,3 +1,14 @@ +databases: + el: + host: "s1-analytics-slave.eqiad.wmnet" + port: 3306 + creds_file: /a/.my.cnf.research + db: log + commons: + host: "s4-analytics-slave.eqiad.wmnet" + port: 3306 + creds_file: /a/.my.cnf.research + db: commonswiki tables: upload_attempts: (SELECT id, uuid, clientIp, isTruncated, clientValidated, timestamp, webHost, wiki, event_appversion, event_device, event_filename, 0 as event_multiple, event_platform, event_result, event_source, event_username FROM MobileAppUploadAttempts_5257716 UNION SELECT * FROM MobileAppUploadAttempts_5334329) as MobileAppUploadAttempts login_attempts: MobileAppLoginAttempts_5257721 @@ -5,6 +16,10 @@ upload_web: MobileWebUploads_5281063 intervals: running_average: 30 +output: + path: /a/limn-public-data/mobile/datafiles +defaults: + db: el graphs: unique-uploaders: title: Unique uploaders (over last 30 days) @@ -20,3 +35,5 @@ title: Successful Uploads (per day) share-attempts: title: Share Attempts (per day) + uploaders-experience: + title: Mobile Uploads vs Commons Edit count diff --git a/mobile/uploaders-experience.py b/mobile/uploaders-experience.py new file mode 100644 index 0000000..cd38076 --- /dev/null +++ b/mobile/uploaders-experience.py @@ -0,0 +1,67 @@ +mobile_uploaders_android_sql = u""" +SELECT event_username, count(*) AS uploads +FROM {{tables.upload_attempts}} +WHERE event_platform LIKE 'Android%' + AND event_result="success" + AND wiki="commonswiki" +GROUP BY event_username""" + +mobile_uploaders_ios_sql = u""" +SELECT event_username, count(*) AS uploads +FROM {{tables.upload_attempts}} +WHERE event_platform LIKE 'iOS%' + AND event_result="success" + AND wiki="commonswiki" +GROUP BY event_username""" + +mobile_uploaders_web_sql = u""" +SELECT event_username, count(*) AS uploads +FROM {{tables.upload_web}} +WHERE event_action="success" + AND wiki != "testwiki" +GROUP BY event_username""" + +edit_count_sql_template = u""" +SELECT user_name, user_editcount +FROM user +WHERE user_name in (%s) +""" + +headers = ["source", "uploads", "edits"] +def results_for(dg, sql, source): + uploads = {} + + commons = dg.conn_for('commons') + el = dg.conn_for('el') + + uploads_cursor = el.cursor() + uploads_cursor.execute(dg.render(sql)) + + for row in uploads_cursor: + username = row[0].strip() + username = username[0].upper() + username[1:] + # Default Edit Count to 0. Happens only if username is not found in commons + # Only case seems to be PhilK10, super early Commons iOS app tester who has + # An account on testwiki, but not on Commons + # The earliest version of the iOS app didn't differentiate between test and commons + uploads[username] = [source, row[1], 0] + + edit_count_sql = edit_count_sql_template % ('%s,' * len(uploads.keys()))[:-1] + edits_cursor = commons.cursor() + edits_cursor.execute(edit_count_sql, uploads.keys()) + + for row in edits_cursor: + username = row[0].strip().decode('utf8') + username = username[0].upper() + username[1:] + uploads[username][-1] = row[1] + return uploads.values() + +def execute(dg): + + results = [] + results.extend(results_for(dg, mobile_uploaders_ios_sql, "iOS")) + results.extend(results_for(dg, mobile_uploaders_android_sql, "Android")) + # Commenting out, because web doesn't track username. UGH + #results.extend(results_for(dg, mobile_uploaders_web_sql, "Web")) + + return (headers, results) -- To view, visit https://gerrit.wikimedia.org/r/57835 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If5d3b2f53dac5071738b4fee2162346d64cae409 Gerrit-PatchSet: 1 Gerrit-Project: analytics/limn-mobile-data Gerrit-Branch: master Gerrit-Owner: Yuvipanda <yuvipa...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits