Hello Milimetric, I'd like you to do a code review. Please visit
https://gerrit.wikimedia.org/r/93326 to review the following change. Change subject: Add per project sums of editors ...................................................................... Add per project sums of editors Change-Id: I46949dd80ff346cad99cc5d1b59274a653a90187 --- M scripts/make_limn_files.py 1 file changed, 63 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/geowiki refs/changes/26/93326/1 diff --git a/scripts/make_limn_files.py b/scripts/make_limn_files.py index df5f698..058eb19 100755 --- a/scripts/make_limn_files.py +++ b/scripts/make_limn_files.py @@ -77,6 +77,8 @@ def write_project_mysql(proj, cursor, basedir, country_graphs=False): + # This function's structure got copy/pasted to write_project_summed_mysql. + # Please apply fixes to this function also to write_project_summed_mysql. logger.debug('writing project datasource for: %s', proj) limn_id = proj + '_all' limn_name = '%s Editors by Country' % proj.upper() @@ -104,6 +106,55 @@ title = '%s Editors in %s' % (proj.upper(), country) graph_id = '%s_%s' % (proj, re.sub('\W+', ' ', country).strip().replace(' ', '_').lower()) source.write_graph(metric_ids=[country], basedir=basedir, title=title, graph_id=graph_id) + + +def write_project_summed_mysql(proj, cursor, basedir): + """Write out per project sums of editors + + Keyword arguments: + proj -- string. The name of the project in the database + (e.g.: 'en' for enwiki). + cursor -- database connection. Used to obtain the data. + basedir -- string. Path to the data repository to store the computed + data in. + """ + # dumb copy/paste of write_project_mysql to get daily per project active + # editor counts. Please apply fixes to this function also to + # write_project_mysql. + logger.debug('writing summed project datasource for: %s', proj) + limn_id = proj + 'wiki_editor_counts' + limn_name = proj + 'wiki editors (Tentative)' + + if sql.paramstyle == 'qmark': + query = """SELECT cohort, end, CONCAT(project, 'wiki') AS wikified_project, SUM(count) + FROM erosen_geocode_active_editors_country + WHERE project = ? AND end = start + INTERVAL 30 day + GROUP BY cohort, end, project""" + logger.debug('making query: %s', query) + elif sql.paramstyle == 'format': + query = """SELECT cohort, end, CONCAT(project, 'wiki') AS wikified_project, SUM(count) + FROM erosen_geocode_active_editors_country + WHERE project = %s AND end = start + INTERVAL 30 day + GROUP BY cohort, end, project""" + cursor.execute(query, [proj]) + proj_rows = cursor.fetchall() + + logger.debug('len(proj_rows): %d', len(proj_rows)) + if not proj_rows and sql.paramstyle == 'format': + logger.debug('No results for query: %s', query % proj) + return + limn_rows = make_limn_rows(proj_rows, 'wikified_project', 'SUM(count)') + source = limnpy.DataSource(limn_id, limn_name, limn_rows, limn_group=LIMN_GROUP) + source.write(basedir=basedir) + graph = source.get_graph(metric_ids = ['%swiki (5+)' % project]) + graph.graph['desc'] = """This graph currently mis-reports by counting each +editor once for each country associated to the IP addresses used by +the editor. +""" + drop_callout_widget(graph) + graph.write(basedir) + + def write_project_top_k_mysql(proj, cursor, basedir, k=10): @@ -327,7 +378,7 @@ return projects -def process_project_par((project, basedir)): +def process_project_par((project, basedir_private, basedir_public)): try: logger.info('processing project: %s', project) db = sql.connect(read_default_file=args.source_sql_cnf, db=args.source_db_name) @@ -335,18 +386,20 @@ # db = sql.connect('/home/erosen/src/editor-geocoding/geowiki.sqlite') - write_project_mysql(project, cursor, basedir) - write_project_top_k_mysql(project, cursor, basedir, k=args.k) - #write_project_country_language(project, cursor, basedir) + write_project_mysql(project, cursor, basedir_private) + write_project_top_k_mysql(project, cursor, basedir_private, k=args.k) + write_project_summed_mysql(project, cursor, basedir_public) + #write_project_country_language(project, cursor, basedir_private) except: logger.exception('caught exception in process:') raise -def process_project(project, cursor, basedir): +def process_project(project, cursor, basedir_private, basedir_public): logger.info('processing project: %s (%d/%d)', project, i, len(projects)) - write_project_mysql(project, cursor, basedir) - write_project_top_k_mysql(project, cursor, basedir, k=args.k) - #write_project_country_language(project, cursor, basedir) + write_project_mysql(project, cursor, basedir_private) + write_project_top_k_mysql(project, cursor, basedir_private, k=args.k) + write_project_summed_mysql(project, cursor, basedir_public) + #write_project_country_language(project, cursor, basedir_private) def plot_gs_editor_fraction(basedir): df = pd.read_csv(os.path.join(basedir, 'datafiles', 'global_south.csv'), index_col='date', parse_dates=['date']) @@ -446,10 +499,10 @@ if not args.parallel or sql.threadsafety < 2: for i, project in enumerate(projects): logger.info('processing project: %s (%d/%d)', project, i, len(projects)) - process_project(project, cursor, args.basedir_private) + process_project(project, cursor, args.basedir_private, args.basedir_public) else: pool = multiprocessing.Pool(20) - pool.map_async(process_project_par, itertools.izip(projects, itertools.repeat(args.basedir_private))).get(99999) + pool.map_async(process_project_par, itertools.izip(projects, itertools.repeat(args.basedir_private), itertools.repeat(args.basedir_public))).get(99999) write_overall_mysql(projects, cursor, args.basedir_private) plot_gs_editor_fraction(args.basedir_private) -- To view, visit https://gerrit.wikimedia.org/r/93326 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I46949dd80ff346cad99cc5d1b59274a653a90187 Gerrit-PatchSet: 1 Gerrit-Project: analytics/geowiki Gerrit-Branch: master Gerrit-Owner: QChris <christ...@quelltextlich.at> Gerrit-Reviewer: Milimetric <dandree...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits