http://www.mediawiki.org/wiki/Special:Code/MediaWiki/94945
Revision: 94945 Author: giovanni Date: 2011-08-18 20:58:40 +0000 (Thu, 18 Aug 2011) Log Message: ----------- added ginichart.py Added Paths: ----------- trunk/tools/wsor/contribution_inequality/ginichart.py Added: trunk/tools/wsor/contribution_inequality/ginichart.py =================================================================== --- trunk/tools/wsor/contribution_inequality/ginichart.py (rev 0) +++ trunk/tools/wsor/contribution_inequality/ginichart.py 2011-08-18 20:58:40 UTC (rev 94945) @@ -0,0 +1,104 @@ +#!/usr/bin/python +''' computes gini coefficient of contribution to namespace per year ''' + +import os +import sys +import csv + +import numpy as np +import matplotlib.pyplot as pp + +from itertools import groupby +from contextlib import closing +from argparse import ArgumentParser +from matplotlib.font_manager import FontProperties + +parser = ArgumentParser(description=__doc__) +parser.add_argument('data_path', metavar='data') +parser.add_argument('-T', '--title') + +colors = 'bgrcmykw' +styles = ['-', '--', '-.', ':'] +markers = 'ov^<>1234' + +def gini(x): + ''' + Computes an estimator of the Gini coefficient from an array x + Parameters + ---------- + x - a flat array of observations + + References + ---------- + http://mathworld.wolfram.com/GiniCoefficient.html + ''' + x.sort() # sorts in non-decreasing order + n = float(len(x)) + i = np.arange(len(x)) + 1 + m = np.mean(x) + return np.sum((2 * i - n - 1) * x) / ( n ** 2 * m) * (n / (n - 1)) + +def igini(flatiter): + ''' + Computes an estimator of the Gini coefficient from a sorted iterator on a + flat sample of observations + + Parameters + ---------- + flatiter - an iterator over observations, sorted in non-decreasing order + + References + ---------- + http://en.wikipedia.org/wiki/Gini_coefficient + http://mathworld.wolfram.com/GiniCoefficient.html + ''' + den = 0.0 + num = 0.0 + for i, y in enumerate(flatiter): + num += (i + 1) * y + den += y + n = i + 1 + return 1 - (2.0 / (n - 1)) * (n - num / den) * (n / (n - 1)) + +if __name__ == '__main__': + + ns = parser.parse_args() + + g = [] + + with closing(open(ns.data_path)) as f: + reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE) + groupfunc = lambda row : map(int, (row['namespace'], row['year'])) + for key, subiter in groupby(reader, groupfunc): + flatiter = ( float(row['total_contributions']) for row in subiter ) + try: + g.append((tuple(key) + (igini(flatiter),))) + except ZeroDivisionError: # due to passing an empty iterator to igini + g.append((tuple(key) + (np.nan,))) + + figure = pp.figure(figsize=(8,4)) + ax = figure.add_axes(pp.axes([.1,.1,.8,.8], axisbg='whitesmoke')) + i = 0 + M = len(markers) + C = len(colors) + S = len(styles) + + for key, subiter in groupby(g, lambda k : k[0]): + data = np.asarray([ (y,g) for n, y, g in subiter ]) + label = 'NS %d' % key + ax.plot(data.T[0], data.T[1], label=label, marker=markers[i % M], + color=colors[i % C], linestyle=styles[i % S]) + i += 1 + + pp.ylabel('Gini coefficient') + pp.legend(loc='best', prop=FontProperties(size='small')) + pp.ylim(0,1) + pp.draw() + if ns.title: + pp.title(ns.title) + figure_path = 'gini_' + ns.title.replace(' ', '_') + '.pdf' + else: + figure_path = 'gini_' + os.path.splitext(ns.data_path)[0] + '.pdf' + pp.savefig(figure_path, fmt='pdf') + print 'output saved to %s' % figure_path + pp.show() Property changes on: trunk/tools/wsor/contribution_inequality/ginichart.py ___________________________________________________________________ Added: svn:executable + * _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs