[MediaWiki-commits] [Gerrit] Add support for executing Python code along with SQL for csv... - change (analytics/limn-mobile-data)

2013-04-06 Thread Yuvipanda (Code Review)
Yuvipanda has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/57835


Change subject: Add support for executing Python code along with SQL for csv gen
..

Add support for executing Python code along with SQL for csv gen

Contains a general refactor to make things slightly objecty.
Everything is now picked up from the conf file, nothing hard coded

Also has a python script that provides a graph of user's mobile
upload count and general edit count. It needs to be a python
script instead of just SQL because it pulls in data from two
different databases on two different hosts.

Change-Id: If5d3b2f53dac5071738b4fee2162346d64cae409
---
M generate.py
M mobile/config.yaml
A mobile/uploaders-experience.py
3 files changed, 159 insertions(+), 38 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/limn-mobile-data 
refs/changes/35/57835/1

diff --git a/generate.py b/generate.py
index 020c0bd..8536272 100644
--- a/generate.py
+++ b/generate.py
@@ -1,51 +1,88 @@
 import os
 import sys
-import glob
+import csv
+import imp
+
+import yaml
 import MySQLdb as mysql
 from jinja2 import Template
-import yaml
-import csv
 
-conn = mysql.connect(
-host=os.environ.get("STAT_HOST", "s1-analytics-slave.eqiad.wmnet"),
-port=int(os.environ.get("STAT_PORT", 3306)),
-read_default_file='/a/.my.cnf.research',
-db="log"
-)
-#conn = mysql.connect("s1-analytics-slave.eqiad.wmnet", 
read_default_file=os.path.expanduser('~/.my.cnf.research'), db="log")
 
-def execute(sql):
-cur = conn.cursor()
-cur.execute(sql)
-return cur
+class DataGenerator():
+def __init__(self, folder_path):
+self.folder_path = folder_path
+self.config = yaml.load(open(os.path.join(self.folder_path, 
"config.yaml")))
+self.connections = {}
 
-def render(template, env):
-t = Template(template)
-return t.render(**env)
+def conn_for(self, name):
+if name not in self.config['databases']:
+raise ValueError("No such database %s" % name)
+
+if name not in self.connections:
+db = self.config['databases'][name]
+self.connections[name] = mysql.connect(
+host=db['host'],
+port=db['port'],
+read_default_file=db['creds_file'],
+db=db['db'],
+charset='utf8',
+use_unicode=True
+)
+
+return self.connections[name]
+
+def render(self, template):
+t = Template(template)
+return t.render(**self.config)
+
+def execute_sql(self, file_name, db_name):
+sql = self.render(open(file_name).read())
+conn = self.conn_for(db_name)
+cursor = conn.cursor()
+cursor.execute(sql)
+rows = cursor.fetchall()
+
+headers = [field[0] for field in cursor.description]
+
+return (headers, rows)
+
+def execute_python(self, name, file_path):
+module = imp.load_source(name, file_path)
+return module.execute(self)
+
+def execute(self):
+
+for key, value in self.config['graphs'].iteritems():
+# Look for the sql first, then python
+db_name = value.get('db', self.config['defaults']['db'])
+headers = None
+rows = None
+file_path = None
+if os.path.exists(os.path.join(folder, key + '.sql')):
+file_path = os.path.join(folder, key + '.sql')
+headers, rows = self.execute_sql(file_path, db_name)
+elif os.path.exists(os.path.join(folder, key + '.py')):
+file_path = os.path.join(folder, key + '.py')
+headers, rows = self.execute_python(key, file_path)
+else:
+raise ValueError("Can not find SQL or Python for %s" % key)
+
+
+print "Generating %s (%s)" % (value['title'], file_path)
+
+csvOutput = open(os.path.join(self.config['output']['path'], key + 
'.csv'), 'w')
+csvOutputWriter = csv.writer(csvOutput)
+
+csvOutputWriter.writerow(headers)
+csvOutputWriter.writerows(rows)
+
+csvOutput.close()
 
 if __name__ == "__main__":
 if len(sys.argv) != 2: #FIXME: argparse please
 print "Usage: generate.py "
-sys.exit(-1)
+sys.exit(1)
 
 folder = sys.argv[1]
-config = yaml.load(open(os.path.join(folder, "config.yaml")))
-graphs = dict([
-(   os.path.basename(filename).split(".")[0], 
-render(open(filename).read(), config)
-) for filename in glob.glob(os.path.join(folder, "*.sql"))
-])
-#url_fmt = 'http://stat1001.wikimedia.org/mobile-dashboard/%s'
-for key, sql in graphs.items():
-print "Generating %s" % key
-cursor = execute(sql)
-rows = cursor.fetchall()
-
-csvOutput = open(os.path.join('/a/limn-public-

[MediaWiki-commits] [Gerrit] Add support for executing Python code along with SQL for csv... - change (analytics/limn-mobile-data)

2013-04-06 Thread Yuvipanda (Code Review)
Yuvipanda has submitted this change and it was merged.

Change subject: Add support for executing Python code along with SQL for csv gen
..


Add support for executing Python code along with SQL for csv gen

Contains a general refactor to make things slightly objecty.
Everything is now picked up from the conf file, nothing hard coded

Also has a python script that provides a graph of user's mobile
upload count and general edit count. It needs to be a python
script instead of just SQL because it pulls in data from two
different databases on two different hosts.

Change-Id: If5d3b2f53dac5071738b4fee2162346d64cae409
---
M generate.py
M mobile/config.yaml
A mobile/uploaders-experience.py
3 files changed, 186 insertions(+), 40 deletions(-)

Approvals:
  Ori.livneh: Looks good to me, approved
  Yuvipanda: Verified



diff --git a/generate.py b/generate.py
index 020c0bd..7aeb743 100644
--- a/generate.py
+++ b/generate.py
@@ -1,51 +1,113 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import csv
+import imp
+import io
 import os
 import sys
-import glob
-import MySQLdb as mysql
-from jinja2 import Template
+
+import MySQLdb
+import jinja2
 import yaml
-import csv
 
-conn = mysql.connect(
-host=os.environ.get("STAT_HOST", "s1-analytics-slave.eqiad.wmnet"),
-port=int(os.environ.get("STAT_PORT", 3306)),
-read_default_file='/a/.my.cnf.research',
-db="log"
-)
-#conn = mysql.connect("s1-analytics-slave.eqiad.wmnet", 
read_default_file=os.path.expanduser('~/.my.cnf.research'), db="log")
 
-def execute(sql):
-cur = conn.cursor()
-cur.execute(sql)
-return cur
+class DataGenerator(object):
+"""Executes queries and generates CSV reports based on YAML configs."""
 
-def render(template, env):
-t = Template(template)
-return t.render(**env)
+def __init__(self, folder_path):
+"""Reads configuration 'config.yaml' in `folder_path`."""
+self.folder_path = folder_path
+config_file_path = os.path.join(folder_path, 'config.yaml')
+with io.open(config_file_path, encoding='utf-8') as config_file:
+self.config = yaml.load(config_file)
+self.connections = {}
+
+def make_connection(self, name):
+"""Opens a connection to a database using parameters specified in YAML
+file for a given name."""
+try:
+db = self.config['databases'][name]
+except KeyError:
+raise ValueError('No such database: "%s"' % name)
+
+self.connections[name] = MySQLdb.connect(
+host=db['host'],
+port=db['port'],
+read_default_file=db['creds_file'],
+db=db['db'],
+charset='utf8',
+use_unicode=True
+)
+
+def get_connection(self, name):
+"""Gets a database connection, specified by its name in the
+configuration files. If no connection exists, makes it."""
+if name not in self.connections:
+self.make_connection(name)
+return self.connections[name]
+
+def render(self, template):
+"""Constructs a SQL query string by interpolating values into a jinja
+template."""
+t = jinja2.Template(template)
+return t.render(**self.config)
+
+def execute_sql(self, file_name, db_name):
+"""Reads a query from `file_name`, renders it, and executes it against
+a database, specified by configuration key.
+
+Returns a tuple of (headers, rows).
+"""
+with io.open(file_name, encoding='utf-8') as f:
+sql = self.render(f.read())
+conn = self.get_connection(db_name)
+cursor = conn.cursor()
+try:
+cursor.execute(sql)
+rows = cursor.fetchall()
+headers = [field[0] for field in cursor.description]
+finally:
+cursor.close()
+return (headers, rows)
+
+def execute_python(self, name, file_path):
+"""Does unspeakable evil. Look away!"""
+module = imp.load_source(name, file_path)
+return module.execute(self)
+
+def execute(self):
+"""Generates a CSV report by executing Python code and SQL queries."""
+global folder
+
+for key, value in self.config['graphs'].iteritems():
+# Look for the sql first, then python
+db_name = value.get('db', self.config['defaults']['db'])
+
+if os.path.exists(os.path.join(folder, key + '.sql')):
+file_path = os.path.join(folder, key + '.sql')
+headers, rows = self.execute_sql(file_path, db_name)
+elif os.path.exists(os.path.join(folder, key + '.py')):
+file_path = os.path.join(folder, key + '.py')
+headers, rows = self.execute_python(key, file_path)
+else:
+raise ValueError("Can not find SQL or Python for %s" % key)
+
+print "Generating %s (%s)"