Nuria has uploaded a new change for review. https://gerrit.wikimedia.org/r/110945
Change subject: [WIP] Create wiki_testing and wikimetric_testing databases so as not clobber dev database. ...................................................................... [WIP] Create wiki_testing and wikimetric_testing databases so as not clobber dev database. Change-Id: I448516e983185266f8c54cf102641cea6de2807a --- M scripts/test M tests/__init__.py M tests/fixtures.py M tests/test_controllers/test_cohorts.py M tests/test_core_classes.py M tests/test_metrics/test_revert_rate.py M tests/test_models/test_user.py M tests/test_models/test_validate_cohort.py M tests/test_utils/test_one_off_functions.py M wikimetrics/configurables.py M wikimetrics/controllers/cohorts.py M wikimetrics/database.py M wikimetrics/models/mediawiki/custom_columns.py M wikimetrics/models/mediawiki/logging.py M wikimetrics/models/mediawiki/revision.py M wikimetrics/models/mediawiki/user.py M wikimetrics/models/validate_cohort.py M wikimetrics/templates/forms/metric_configuration.html 18 files changed, 283 insertions(+), 122 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics refs/changes/45/110945/1 diff --git a/scripts/test b/scripts/test index fc20168..5c14275 100755 --- a/scripts/test +++ b/scripts/test @@ -1,4 +1,5 @@ # for example: # scripts/test "tests/test_controllers/test_cohorts.py:CohortsControllerTest" # rm .coverage *.db -find . -name *.pyc | xargs rm ; nosetests --cover-erase $1 +# nosetest -s : do not capture all stdout +find . -name *.pyc | xargs rm ; nosetests -s --cover-erase $1 diff --git a/tests/__init__.py b/tests/__init__.py index b239005..c56ac06 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -2,11 +2,76 @@ from os import devnull from signal import SIGINT from time import sleep - -from wikimetrics.configurables import app - +from wikimetrics.configurables import app, db +from wikimetrics.utils import format_date +# CREATE DB IF NOT EXISTS gives a (pretty useless) warning if DB exists +from warnings import filterwarnings +import MySQLdb as mysql +filterwarnings('ignore', category=mysql.Warning) celery_proc = None + + +def create_test_db(dbName, cursor): + sql = "CREATE DATABASE IF NOT EXISTS " + dbName + " ; " + cursor.execute(sql) + cursor.fetchone() + sql = "GRANT ALL on " + dbName + ".* TO wikimetrics@'localhost' " + cursor.execute(sql) + cursor.fetchone() + + +def compose_connection_string(user, password, host, dbName): + from urlparse import urlparse + # results in ParseResult(scheme='mysql', netloc='root:vagrant@localhost', + # path='/wiki', params='', query='', fragment='') + return "mysql://" + user + ":" + password + "@" + host + "/" + dbName + + +def setUpTestingDB(): + """ + Set global testing variables, create the databases we need for + testing and grant wikimetrics user + permits in all of them. + By convention testing dbs are development dbs with sufix "_testing" + Note that wikimetrics user already exists, puppet has created it. + """ + + # Set TESTING to true so we can know to not check CSRF + # TODO we need a global config object + app.config['TESTING'] = True + db.config['TESTING'] = True + + user = db.config["MEDIAWIKI"]["USER"] + password = db.config["MEDIAWIKI"]["PASSWORD"] + host = db.config["MEDIAWIKI"]["HOST"] + + # hardcode valid projecthostnames for testing + db.config["PROJECT_HOST_NAMES"] = ['wiki', 'dewiki'] + + # add testing suffix to db + db.config["WIKIMETRICS"]["DBNAME"] = db.config["WIKIMETRICS"]["DBNAME"] + '_testing' + db.config["MEDIAWIKI"]["DBNAME"] = db.config["MEDIAWIKI"]["DBNAME"] + '_testing' + dbNameMediawiki = db.config["MEDIAWIKI"]["DBNAME"] + dbNameWikimetrics = db.config["WIKIMETRICS"]["DBNAME"] + + # change db connection strings to connect to testing db + db.config["MEDIAWIKI_ENGINE_URL_TEMPLATE"] = compose_connection_string( + user, password, host, dbNameMediawiki) + db.config["WIKIMETRICS_ENGINE_URL"] = compose_connection_string( + db.config["WIKIMETRICS"]["USER"], + db.config["WIKIMETRICS"]["PASSWORD"], host, dbNameWikimetrics) + + _db = mysql.connect(host=host, user=user, passwd=password) + cursor = _db.cursor() + # create testing dbs + for dbName in (dbNameMediawiki, dbNameWikimetrics, 'dewiki_testing'): + create_test_db(dbName, cursor) + _db.close() + + +#Initializing testing databases before setup method +setUpTestingDB() def celery_is_alive(): @@ -23,9 +88,10 @@ def setUp(): - # Set TESTING to true so we can know to not check CSRF - app.config['TESTING'] = True - + """ + Set global testing variables and override database names s + so they have "_testing" as a suffix + """ celery_out = open(devnull, "w") celery_cmd = ['wikimetrics', '--mode', 'queue'] global celery_proc @@ -42,3 +108,10 @@ global celery_proc if celery_proc is not None: celery_proc.send_signal(SIGINT) + + +def i(date_object): + """ + helper function to convert dates into integers representing mediawiki timestamps + """ + return int(format_date(date_object)) diff --git a/tests/fixtures.py b/tests/fixtures.py index cd58aac..523a2c3 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -15,6 +15,7 @@ 'i', 'd', 'tz_note', + 'mediawikiProject' ] @@ -54,6 +55,8 @@ MediawikiUser, Logging, ) + +mediawikiProject = db.config['PROJECT_HOST_NAMES'][0] class DatabaseTest(unittest.TestCase): @@ -119,11 +122,11 @@ assign all pages to the same editor or an array to specify each one individually owner_user_id : record in the User table that owns this cohort - + page_touched : <needs to be filled in> - - user-email_token_expires: <needs to be filled in> - + + user_email_token_expires: <needs to be filled in> + Returns Nothing but creates the following, to be accessed in a test: self.cohort : owned by web_test_user, contains self.editors @@ -155,7 +158,7 @@ if type(user_registrations) is int: user_registrations = [user_registrations] * editor_count - self.project = 'wiki' + self.project = mediawikiProject self.cohort = Cohort( name='{0}-cohort'.format(name), @@ -167,7 +170,7 @@ self.session.commit() self.page = Page(page_namespace=0, page_title='{0}-page'.format(name), - page_touched=page_touched) + page_touched=page_touched) self.mwSession.add(self.page) self.mwSession.commit() @@ -404,11 +407,11 @@ #**************************************************************** # set up and clean database (Warning: this DESTROYS ALL DATA) #**************************************************************** - project = 'wiki' self.session = db.get_session() - engine = db.get_mw_engine(project) + engine = db.get_mw_engine(mediawikiProject) db.MediawikiBase.metadata.create_all(engine, checkfirst=True) - self.mwSession = db.get_mw_session(project) + # mediawikiProject is a global defined on this file + self.mwSession = db.get_mw_session(mediawikiProject) DatabaseTest.tearDown(self) def tearDown(self): @@ -494,15 +497,14 @@ class DatabaseWithSurvivorCohortTest(unittest.TestCase): - + def acquireDBHandles(self): - project = 'wiki' self.session = db.get_session() - engine = db.get_mw_engine(project) + engine = db.get_mw_engine(mediawikiProject) db.MediawikiBase.metadata.create_all(engine, checkfirst=True) - self.mwSession = db.get_mw_session(project) + self.mwSession = db.get_mw_session(mediawikiProject) self.survivors_namespace = 0 - + def clearWikimetrics(self): self.session.query(CohortWikiUser).delete() self.session.query(CohortUser).delete() @@ -512,7 +514,7 @@ self.session.query(PersistentReport).delete() self.session.commit() self.session.close() - + def clearMediawiki(self): self.mwSession.query(Logging).delete() self.mwSession.query(Revision).delete() @@ -520,7 +522,7 @@ self.mwSession.query(Page).delete() self.mwSession.commit() self.mwSession.close() - + def createUsers(self): mw_user_dan = MediawikiUser(user_name='Dan') mw_user_evan = MediawikiUser(user_name='Evan') @@ -529,29 +531,31 @@ self.mwSession.add_all([mw_user_dan, mw_user_evan, mw_user_andrew, mw_user_diederik]) self.mwSession.commit() - + wu_dan = WikiUser(mediawiki_username='Dan', valid=True, - mediawiki_userid=mw_user_dan.user_id, project='wiki') - wu_evan = WikiUser(mediawiki_username='Evan', valid=True, - mediawiki_userid=mw_user_evan.user_id, project='wiki') + mediawiki_userid=mw_user_dan.user_id, project=mediawikiProject) + wu_evan = WikiUser(mediawiki_username='Evan', + valid=True, mediawiki_userid=mw_user_evan.user_id, + project=mediawikiProject) wu_andrew = WikiUser(mediawiki_username='Andrew', valid=True, - mediawiki_userid=mw_user_andrew.user_id, project='wiki') + mediawiki_userid=mw_user_andrew.user_id, + project=mediawikiProject) wu_diederik = WikiUser(mediawiki_username='Diederik', valid=True, mediawiki_userid=mw_user_diederik.user_id, - project='wiki') + project=mediawikiProject) self.session.add_all([wu_dan, wu_evan, wu_andrew, wu_diederik]) self.session.commit() - + self.dan_id = wu_dan.id self.evan_id = wu_evan.id self.andrew_id = wu_andrew.id self.diederik_id = wu_diederik.id - + self.mw_dan_id = mw_user_dan.user_id self.mw_evan_id = mw_user_evan.user_id self.mw_andrew_id = mw_user_andrew.user_id self.mw_diederik_id = mw_user_diederik.user_id - + def createCohort(self): self.cohort = Cohort( name='demo-survivor-cohort', @@ -562,7 +566,7 @@ ) self.session.add(self.cohort) self.session.commit() - + ids = [self.dan_id, self.evan_id, self.andrew_id, self.diederik_id] for wiki_editor_id in ids: cohort_wiki_editor = CohortWikiUser( @@ -571,33 +575,33 @@ ) self.session.add(cohort_wiki_editor) self.session.commit() - + # update dan,evan,andrew,diederik user_registration timestamp def updateSurvivorRegistrationData(self): registration_date_dan = format_date(datetime(2013, 1, 1)) registration_date_evan = format_date(datetime(2013, 1, 2)) registration_date_andrew = format_date(datetime(2013, 1, 3)) - + self.mwSession.query(MediawikiUser) \ .filter(MediawikiUser.user_id == self.mw_dan_id) \ .update({"user_registration": registration_date_dan}) - + self.mwSession.query(MediawikiUser) \ .filter(MediawikiUser.user_id == self.mw_evan_id) \ .update({"user_registration": registration_date_evan}) - + self.mwSession.query(MediawikiUser) \ .filter(MediawikiUser.user_id == self.mw_andrew_id) \ .update({"user_registration": registration_date_andrew}) - + def createPageForSurvivors(self): self.page = Page(page_namespace=self.survivors_namespace, page_title='SurvivorTestPage') self.mwSession.add_all([self.page]) self.mwSession.commit() - + def createRevisionsForSurvivors(self): - + # create a revision for user with id uid at time t def createCustomRevision(uid, t): r = Revision( @@ -610,20 +614,20 @@ ) self.mwSession.add(r) self.mwSession.commit() - + createCustomRevision(self.mw_dan_id, datetime(2013, 1, 1)) createCustomRevision(self.mw_dan_id, datetime(2013, 1, 2)) createCustomRevision(self.mw_dan_id, datetime(2013, 1, 3)) - + createCustomRevision(self.mw_evan_id, datetime(2013, 1, 2)) createCustomRevision(self.mw_evan_id, datetime(2013, 1, 3)) createCustomRevision(self.mw_evan_id, datetime(2013, 1, 4)) - + createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 3)) createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 4)) createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 5)) createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 6)) - + def setUp(self): self.acquireDBHandles() self.clearWikimetrics() @@ -633,6 +637,6 @@ self.updateSurvivorRegistrationData() self.createPageForSurvivors() self.createRevisionsForSurvivors() - + def runTest(self): pass diff --git a/tests/test_controllers/test_cohorts.py b/tests/test_controllers/test_cohorts.py index d97ac8c..1454502 100644 --- a/tests/test_controllers/test_cohorts.py +++ b/tests/test_controllers/test_cohorts.py @@ -2,7 +2,7 @@ import time from StringIO import StringIO from nose.tools import assert_equal, assert_true, assert_false, raises, nottest - +from wikimetrics.configurables import app from tests.fixtures import WebTest from wikimetrics.models import ( Cohort, CohortUser, CohortUserRole, ValidateCohort, @@ -64,8 +64,7 @@ def test_detail_by_name_after_async_validate(self): self.helper_reset_validation() - - validate_cohort = ValidateCohort(self.cohort) + validate_cohort = ValidateCohort(self.cohort, app.config) async_result = validate_cohort.task.delay(validate_cohort) self.cohort.validation_queue_key = async_result.task_id async_result.get() diff --git a/tests/test_core_classes.py b/tests/test_core_classes.py index 72783bf..c03f578 100644 --- a/tests/test_core_classes.py +++ b/tests/test_core_classes.py @@ -1,23 +1,37 @@ import os from unittest import TestCase from nose.tools import assert_equals, assert_true -from wikimetrics.configurables import db +from wikimetrics.configurables import db, parse_db_connection_string from wikimetrics.database import get_host_projects, get_host_projects_map class DatabaseSetupTest(TestCase): - + """" + These tests access the 'live" project_host_map + thus they make an http connection to get it. + The rest of the tests should would offline as they do not access + the function get_host_projects directly. + """ def test_get_host_projects(self): (host_one, projects) = get_host_projects(1) assert_equals(host_one, 1) - assert_true('wiki' in projects) + assert_true('enwiki' in projects) def test_get_host_projects_map(self): project_host_map = get_host_projects_map() - assert_true('wiki' in project_host_map) - assert_true('arwiki' in project_host_map) - assert_true('commonswiki' in project_host_map) - + assert_true('enwiki' in project_host_map) + assert_true('dewiki' in project_host_map) + + def test_parse_db_connection_string(self): + url = "mysql://wikimetrics:wikimetrics@localhost/wikimetrics" + user, password, host, dbName = parse_db_connection_string(url) + assert_equals(user, 'wikimetrics') + assert_equals(password, 'wikimetrics') + assert_equals(host, 'localhost') + assert_equals(dbName, 'wikimetrics') + + + #def test_get_fresh_project_host_map(self): #project_host_map_cache_file = 'project_host_map.json' ## make sure any cached file is deleted diff --git a/tests/test_metrics/test_revert_rate.py b/tests/test_metrics/test_revert_rate.py index b0f22aa..6efec01 100644 --- a/tests/test_metrics/test_revert_rate.py +++ b/tests/test_metrics/test_revert_rate.py @@ -4,6 +4,7 @@ from wikimetrics.metrics import RevertRate, TimeseriesChoices from wikimetrics.models import Cohort, MetricReport + class RevertRateTest(DatabaseTest): def setUp(self): @@ -20,6 +21,7 @@ [2, 4, 5], # User B reverts user A's edit #3 back to edit #2. ], ) + @nottest def test_single_revert(self): metric = RevertRate( diff --git a/tests/test_models/test_user.py b/tests/test_models/test_user.py index c02104c..20806a0 100644 --- a/tests/test_models/test_user.py +++ b/tests/test_models/test_user.py @@ -31,5 +31,4 @@ user = self.session.query(User).get(self.owner_user_id) user.authenticated = False self.session.commit() - assert_equal(user.is_anonymous(), True) diff --git a/tests/test_models/test_validate_cohort.py b/tests/test_models/test_validate_cohort.py index 836dd95..3179109 100644 --- a/tests/test_models/test_validate_cohort.py +++ b/tests/test_models/test_validate_cohort.py @@ -1,22 +1,28 @@ import unittest from nose.tools import assert_equal, raises, assert_true, assert_false -from tests.fixtures import WebTest, QueueDatabaseTest +from wikimetrics.configurables import app, db +from tests.fixtures import WebTest, QueueDatabaseTest, mediawikiProject from wikimetrics.controllers.forms import CohortUpload from wikimetrics.models import ( MediawikiUser, Cohort, WikiUser, ValidateCohort, User, normalize_project, ) +# do not hardcode project name + +# add 'enwiki' as a valid projectname for these tests test +db.config['PROJECT_HOST_NAMES'].append('enwiki') + class ValidateCohortTest(WebTest): def test_normalize_project_shorthand(self): normal = normalize_project('en') - assert_equal(normal, 'wiki') + assert_equal(normal, 'enwiki') def test_normalize_project_uppercase(self): - normal = normalize_project('ENWIKI') - assert_equal(normal, 'wiki') + normal = normalize_project(mediawikiProject.upper()) + assert_equal(normal, mediawikiProject) def test_normalize_project_nonexistent(self): normal = normalize_project('blah') @@ -26,7 +32,7 @@ self.helper_reset_validation() self.cohort.validate_as_user_ids = False self.session.commit() - v = ValidateCohort(self.cohort) + v = ValidateCohort(self.cohort, app.config) v.validate_records(self.session, self.cohort) assert_equal(self.cohort.validated, True) @@ -44,7 +50,7 @@ wikiusers[0].project = 'blah' wikiusers[1].mediawiki_username = 'blah' self.session.commit() - v = ValidateCohort(self.cohort) + v = ValidateCohort(self.cohort, app.config) v.validate_records(self.session, self.cohort) assert_equal(self.cohort.validated, True) @@ -63,7 +69,7 @@ class ValidateCohortQueueTest(QueueDatabaseTest): - + def setUp(self): QueueDatabaseTest.setUp(self) @@ -77,23 +83,24 @@ self.owner_user_id = owner_user.id def test_small_cohort(self): + cohort_upload = CohortUpload() cohort_upload.name.data = 'small_cohort' - cohort_upload.project.data = 'wiki' + cohort_upload.project.data = mediawikiProject cohort_upload.records = [ # two existing users - {'username': 'Editor test-specific-0', 'project': 'wiki'}, - {'username': 'Editor test-specific-1', 'project': 'wiki'}, + {'username': 'Editor test-specific-0', 'project': mediawikiProject}, + {'username': 'Editor test-specific-1', 'project': mediawikiProject}, # one invalid username - {'username': 'Nonexisting', 'project': 'wiki'}, + {'username': 'Nonexisting', 'project': mediawikiProject}, # one user with invalid project {'username': 'Nonexisting2', 'project': 'Nonexisting'}, ] - - v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id) + + v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id, app.config) v.task.delay(v).get() self.session.commit() - + assert_equal(self.session.query(WikiUser).filter( WikiUser.mediawiki_username == 'Editor test-specific-0').one().valid, True) assert_equal(self.session.query(WikiUser).filter( @@ -109,7 +116,7 @@ cohort_upload.project.data = 'wiki' cohort_upload.records = [{'fake': 'dict'}] - v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id) + v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id, app.config) assert_equal(v, None) @@ -117,5 +124,5 @@ def test_repr(self): cohort = Cohort(id=1) - v = ValidateCohort(cohort) + v = ValidateCohort(cohort, app.config) assert_equal(str(v), '<ValidateCohort("1")>') diff --git a/tests/test_utils/test_one_off_functions.py b/tests/test_utils/test_one_off_functions.py index 959acc5..407d15c 100644 --- a/tests/test_utils/test_one_off_functions.py +++ b/tests/test_utils/test_one_off_functions.py @@ -75,7 +75,7 @@ assert_equal(project, 'en') def test_project_name_for_link_with_wiki(self): - project = project_name_for_link('wiki') + project = project_name_for_link('enwiki') assert_equal(project, 'en') def test_link_to_user_page(self): diff --git a/wikimetrics/configurables.py b/wikimetrics/configurables.py index 96ec252..d70b5ae 100644 --- a/wikimetrics/configurables.py +++ b/wikimetrics/configurables.py @@ -4,6 +4,23 @@ import subprocess +def parse_db_connection_string(urlConnectionString): + """ + From a url like: mysql://wikimetrics:wikimetrics@localhost/wikimetrics + exracts user, password, host, dbName + """ + from urlparse import urlparse + parsed = urlparse(urlConnectionString) + # results in + # ParseResult(scheme='mysql', netloc='root:vagrant@localhost', + # path='/wiki', params='', query='', fragment='') + netloc = parsed.netloc + user = netloc.split(":")[0] + password, host = netloc.split(":")[1].split("@") + dbName = parsed.path.split("/")[1] + return user, password, host, dbName + + # TODO: does not work in labs environment def create_object_from_config_file(path): dir, fname = os.path.split(path) @@ -147,6 +164,10 @@ # TODO: look into making a single config object that has empty sections if # some roles are not used (or maybe dependency injection) def config_db(args): + """ + Initializes the config object with what's passed in, further splits the config + to get a user,password, host and dbName + """ from .database import Database db_config = create_dict_from_text_config_file(args.db_config) @@ -155,6 +176,26 @@ db_config.__dict__.update(config_override) global db + user, password, host, dbName = parse_db_connection_string( + db_config["MEDIAWIKI_ENGINE_URL_TEMPLATE"]) + db_config["MEDIAWIKI"] = {} + db_config["MEDIAWIKI"]["USER"] = user + db_config["MEDIAWIKI"]["PASSWORD"] = password + db_config["MEDIAWIKI"]["HOST"] = host + db_config["MEDIAWIKI"]["DBNAME"] = dbName + + user, password, host, dbName = parse_db_connection_string( + db_config["WIKIMETRICS_ENGINE_URL"]) + db_config["WIKIMETRICS"] = {} + db_config["WIKIMETRICS"]["USER"] = user + db_config["WIKIMETRICS"]["PASSWORD"] = password + db_config["WIKIMETRICS"]["HOST"] = host + db_config["WIKIMETRICS"]["DBNAME"] = dbName + + # test setup will override this setting if needed + if db_config["DEBUG"]: + db_config["PROJECT_HOST_NAMES"] = [db_config["MEDIAWIKI"]["DBNAME"]] + db = Database(db_config) diff --git a/wikimetrics/controllers/cohorts.py b/wikimetrics/controllers/cohorts.py index 4addb40..914d8c9 100644 --- a/wikimetrics/controllers/cohorts.py +++ b/wikimetrics/controllers/cohorts.py @@ -163,7 +163,7 @@ flash('That Cohort name is already taken.', 'warning') else: form.parse_records() - vc = ValidateCohort.from_upload(form, current_user.id) + vc = ValidateCohort.from_upload(form, current_user.id, app.config) vc.task.delay(vc) return redirect('{0}#{1}'.format( url_for('cohorts_index'), @@ -175,7 +175,7 @@ return render_template( 'csv_upload.html', - projects=json.dumps(sorted(db.project_host_map.keys())), + projects=json.dumps(sorted(db.get_project_host_map().keys())), form=form, ) @@ -201,7 +201,7 @@ @app.route('/cohorts/validate/project') def validate_cohort_project_allowed(): project = request.args.get('project') - valid = project in db.project_host_map + valid = project in db.get_project_host_map() return json.dumps(valid) @@ -212,7 +212,8 @@ try: cohort = Cohort.get_safely(session, current_user.id, by_id=cohort_id) name = cohort.name - vc = ValidateCohort(cohort) + # TODO we need some kind of global config that is not db specific + vc = ValidateCohort(cohort, app.config) vc.task.delay(vc) return json_response(message='Validating cohort "{0}"'.format(name)) except Unauthorized: diff --git a/wikimetrics/database.py b/wikimetrics/database.py index 6733080..e9c5def 100644 --- a/wikimetrics/database.py +++ b/wikimetrics/database.py @@ -40,16 +40,15 @@ def get_host_projects_map(): + project_host_map = {} # TODO: these numbers are hardcoded, is that ok? num_hosts = 7 host_projects = map(get_host_projects, range(1, num_hosts + 1)) - project_host_map = {} host_fmt = 's{0}' for host_id, projects in host_projects: host = host_fmt.format(host_id) for project in projects: project_host_map[project] = host - return project_host_map @@ -62,22 +61,25 @@ def __init__(self, config): """ - Initializes the config object with what's passed in Initializes the declarative bases that are used throughout the project. Initializes the empty engines and sessionmakers that support `get_session` and `get_mw_session`. """ + self.config = config + self.WikimetricsBase = declarative_base(cls=SerializableBase) self.MediawikiBase = declarative_base(cls=SerializableBase) - + self.wikimetrics_engine = None self.wikimetrics_sessionmaker = None self.mediawiki_engines = {} self.mediawiki_sessionmakers = {} - self.project_host_map = self.get_project_host_map(usecache=True) - + + # we instantiate project_host_map lazily + self._project_host_map = None + def get_session(self): """ On the first run, instantiates the Wikimetrics session maker @@ -98,7 +100,7 @@ import wikimetrics.models self.WikimetricsBase.metadata.create_all( self.wikimetrics_engine, - checkfirst=True + checkfirst=True, ) self.wikimetrics_sessionmaker = sessionmaker(self.wikimetrics_engine) @@ -125,7 +127,7 @@ engine, checkfirst=True ) - + # Assuming that we're not using the real mediawiki databases in debug mode, # we have to create the tables #if self.config['DEBUG']: @@ -149,9 +151,6 @@ return self.mediawiki_engines[project] else: engine_template = self.config['MEDIAWIKI_ENGINE_URL_TEMPLATE'] - # This allows vagrant's wiki database to be used and configured separately - if self.config['DEBUG'] and project == 'wiki': - engine_template = self.config['DEV_ENGINE_URL_TEMPLATE'] engine = create_engine( engine_template.format(project), @@ -164,30 +163,44 @@ def get_project_host_map(self, usecache=True): """ Retrieves the list of mediawiki projects from noc.wikimedia.org. + If we are on development or testing project_host_map + does not access the network to verify project names. + Project names are hardcoded. + + Note that the project_host_map_list is fetched + not at the time we construct the object + but the first time we request it Parameters: usecache : defaults to True and uses a local cache if available + """ - cache_name = 'project_host_map.json' - if not exists(cache_name) or not usecache: + if self._project_host_map == None or usecache == False: + project_host_map = {} - project_host_map = get_host_projects_map() - if usecache and os.access(cache_name, os.W_OK): - try: - json.dump(project_host_map, open(cache_name, 'w')) - except: - print('No rights to write project host map cache {0}'.format( - os.path.abspath(cache_name) - )) - elif os.access(cache_name, os.R_OK): - project_host_map = json.load(open(cache_name)) - else: - raise Exception('Project host map could not be fetched or read') - - if self.config['DEBUG']: - project_host_map['wiki'] = 'mediawiki-vagrant' - - return project_host_map + if self.config.get("DEBUG"): + # tests init.py overrides this setting if needed + for p in self.config.get("PROJECT_HOST_NAMES"): + project_host_map[p] = 'localhost' + else: + # TODO Two processes could come here at the same time. LOCK! + cache_name = 'project_host_map.json' + if not exists(cache_name) or not usecache: + project_host_map = get_host_projects_map() + if usecache and os.access(cache_name, os.W_OK): + try: + json.dump(project_host_map, open(cache_name, 'w')) + except: + print('No rights to write project host map cache {0}'.format( + os.path.abspath(cache_name) + )) + elif os.access(cache_name, os.R_OK): + project_host_map = json.load(open(cache_name)) + else: + raise Exception('Project host map could not be fetched or read') + + self._project_host_map = project_host_map + return self._project_host_map @event.listens_for(Pool, "checkout") @@ -204,7 +217,7 @@ # optional - dispose the whole pool # instead of invalidating one at a time # connection_proxy._pool.dispose() - + # raise DisconnectionError - pool will try # connecting again up to three times before raising. raise exc.DisconnectionError() diff --git a/wikimetrics/models/mediawiki/custom_columns.py b/wikimetrics/models/mediawiki/custom_columns.py index 35adae9..d9aeece 100644 --- a/wikimetrics/models/mediawiki/custom_columns.py +++ b/wikimetrics/models/mediawiki/custom_columns.py @@ -32,6 +32,6 @@ To unbundle we have to detect unicodestrings that are set as mysql defaults they are not represented by singleton None """ - if not value or value == u'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00': + if not value or value == u"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00": return None return parse_date(value) diff --git a/wikimetrics/models/mediawiki/logging.py b/wikimetrics/models/mediawiki/logging.py index 27f20e1..d8de79f 100644 --- a/wikimetrics/models/mediawiki/logging.py +++ b/wikimetrics/models/mediawiki/logging.py @@ -12,12 +12,12 @@ log_id = Column(Integer, primary_key=True) log_type = Column(String(32), nullable=False, default='') log_action = Column(String(32), nullable=False, default='') - log_timestamp = Column(MediawikiTimestamp,nullable=False, default=u'19700101000000') - log_user = Column(Integer, ForeignKey('user.user_id'), nullable=False,default=0) + log_timestamp = Column(MediawikiTimestamp, nullable=False, default=u'19700101000000') + log_user = Column(Integer, ForeignKey('user.user_id'), nullable=False, default=0) log_namespace = Column(Integer, nullable=False, default=0) log_title = Column(String(255), nullable=False, default='') log_comment = Column(String(255), nullable=False, default='') - log_params= Column(BLOB, nullable=False,default='') + log_params = Column(BLOB, nullable=False, default='') log_deleted = Column(Boolean, nullable=False, default=0) log_user_text = Column(String(255), nullable=False, default='') - log_page = Column(Integer, ForeignKey('page.page_id')) \ No newline at end of file + log_page = Column(Integer, ForeignKey('page.page_id')) diff --git a/wikimetrics/models/mediawiki/revision.py b/wikimetrics/models/mediawiki/revision.py index c9e4046..9956b85 100644 --- a/wikimetrics/models/mediawiki/revision.py +++ b/wikimetrics/models/mediawiki/revision.py @@ -15,7 +15,7 @@ rev_user = Column(Integer, ForeignKey('user.user_id'), nullable=False, default=0) rev_user_text = Column(String(255), nullable=False, default='') rev_timestamp = Column(MediawikiTimestamp, nullable=False, - default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0') + default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0') rev_minor_edit = Column(Integer, nullable=False, default='0') # this might be a boolean but it gets overflown if set that way rev_deleted = Column(Integer) diff --git a/wikimetrics/models/mediawiki/user.py b/wikimetrics/models/mediawiki/user.py index 177ca0f..ecb6bd4 100644 --- a/wikimetrics/models/mediawiki/user.py +++ b/wikimetrics/models/mediawiki/user.py @@ -18,9 +18,9 @@ user_newpass_time = Column(MediawikiTimestamp) user_email = Column(String(255), nullable=False, default='') user_touched = Column(MediawikiTimestamp, nullable=False, - default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0') + default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0') user_token = Column(String(255), nullable=False, - default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0') + default=u'''\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0''') user_email_authenticated = Column(MediawikiTimestamp) user_email_token = Column(String(255)) user_email_token_expires = Column(MediawikiTimestamp) diff --git a/wikimetrics/models/validate_cohort.py b/wikimetrics/models/validate_cohort.py index 6b24fca..45fe040 100644 --- a/wikimetrics/models/validate_cohort.py +++ b/wikimetrics/models/validate_cohort.py @@ -32,10 +32,12 @@ """ task = async_validate - def __init__(self, cohort): + def __init__(self, cohort, config): """ Parameters: cohort : an existing cohort + config : global config, we need to know + if we are on dev or testing to validate project name Instantiates with these properties: cohort_id : id of an existing cohort with validated == False @@ -44,9 +46,10 @@ """ self.cohort_id = cohort.id self.validate_as_user_ids = cohort.validate_as_user_ids + self.config = config @classmethod - def from_upload(cls, cohort_upload, owner_user_id): + def from_upload(cls, cohort_upload, owner_user_id, config): """ Create a new cohort and validate a list of uploaded users for it @@ -91,7 +94,7 @@ ] ) session.commit() - return cls(cohort) + return cls(cohort, config) except Exception, e: app.logger.error(str(e)) return None @@ -122,6 +125,7 @@ session : an active wikimetrics db session to use cohort : the cohort to validate; must belong to session """ + # reset the cohort validation status so it can't be used for reports cohort.validated = False session.execute( @@ -203,14 +207,17 @@ def normalize_project(project): """ Decides whether the name of the project is a valid one + There are differences in db names in local setup versus vagrant setup + While local setup uses enwiki mediawiki vagrant uses wiki + We let 'wiki' be an acceptable name in development. """ project = project.strip().lower() - if project in db.project_host_map: + if project in db.get_project_host_map(): return project else: # try adding wiki to end new_proj = project + 'wiki' - if new_proj not in db.project_host_map: + if new_proj not in db.get_project_host_map(): return None else: return new_proj diff --git a/wikimetrics/templates/forms/metric_configuration.html b/wikimetrics/templates/forms/metric_configuration.html index 0c4da93..34489a0 100644 --- a/wikimetrics/templates/forms/metric_configuration.html +++ b/wikimetrics/templates/forms/metric_configuration.html @@ -31,7 +31,7 @@ <li>Examples: <a target="_blank" href="https://de.wikipedia.org/wiki/Wikipedia:Namespace">dewiki</a>, <a target="_blank" href="https://commons.wikimedia.org/wiki/Help:Namespaces">commons</a>, - <a target="_blank" href="https://en.wikipedia.org/wiki/Wikipedia:Namespace">wiki</a></li> + <a target="_blank" href="https://en.wikipedia.org/wiki/Wikipedia:Namespace">enwiki</a></li> </ul> </div> {% endif %} -- To view, visit https://gerrit.wikimedia.org/r/110945 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I448516e983185266f8c54cf102641cea6de2807a Gerrit-PatchSet: 1 Gerrit-Project: analytics/wikimetrics Gerrit-Branch: master Gerrit-Owner: Nuria <nu...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits