Nuria has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/110945

Change subject: [WIP] Create wiki_testing and wikimetric_testing databases so 
as not clobber dev database.
......................................................................

[WIP] Create wiki_testing and wikimetric_testing databases so as not clobber 
dev database.

Change-Id: I448516e983185266f8c54cf102641cea6de2807a
---
M scripts/test
M tests/__init__.py
M tests/fixtures.py
M tests/test_controllers/test_cohorts.py
M tests/test_core_classes.py
M tests/test_metrics/test_revert_rate.py
M tests/test_models/test_user.py
M tests/test_models/test_validate_cohort.py
M tests/test_utils/test_one_off_functions.py
M wikimetrics/configurables.py
M wikimetrics/controllers/cohorts.py
M wikimetrics/database.py
M wikimetrics/models/mediawiki/custom_columns.py
M wikimetrics/models/mediawiki/logging.py
M wikimetrics/models/mediawiki/revision.py
M wikimetrics/models/mediawiki/user.py
M wikimetrics/models/validate_cohort.py
M wikimetrics/templates/forms/metric_configuration.html
18 files changed, 283 insertions(+), 122 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/45/110945/1

diff --git a/scripts/test b/scripts/test
index fc20168..5c14275 100755
--- a/scripts/test
+++ b/scripts/test
@@ -1,4 +1,5 @@
 # for example:
 # scripts/test "tests/test_controllers/test_cohorts.py:CohortsControllerTest"
 # rm .coverage *.db
-find . -name *.pyc | xargs rm ; nosetests --cover-erase $1
+# nosetest -s : do not capture all stdout
+find . -name *.pyc | xargs rm ; nosetests -s  --cover-erase $1
diff --git a/tests/__init__.py b/tests/__init__.py
index b239005..c56ac06 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -2,11 +2,76 @@
 from os import devnull
 from signal import SIGINT
 from time import sleep
-
-from wikimetrics.configurables import app
-
+from wikimetrics.configurables import app, db
+from wikimetrics.utils import format_date
+# CREATE DB IF NOT EXISTS gives a (pretty useless) warning if DB exists
+from warnings import filterwarnings
+import MySQLdb as mysql
+filterwarnings('ignore', category=mysql.Warning)
 
 celery_proc = None
+
+
+def create_test_db(dbName, cursor):
+    sql = "CREATE DATABASE IF NOT EXISTS " + dbName + " ; "
+    cursor.execute(sql)
+    cursor.fetchone()
+    sql = "GRANT ALL on " + dbName + ".* TO wikimetrics@'localhost' "
+    cursor.execute(sql)
+    cursor.fetchone()
+
+
+def compose_connection_string(user, password, host, dbName):
+    from urlparse import urlparse
+    # results in ParseResult(scheme='mysql', netloc='root:vagrant@localhost',
+    # path='/wiki', params='', query='', fragment='')
+    return "mysql://" + user + ":" + password + "@" + host + "/" + dbName
+
+
+def setUpTestingDB():
+    """
+        Set global testing variables, create the databases we need for
+        testing and grant wikimetrics user
+        permits in all of them.
+        By convention testing dbs are development dbs with sufix "_testing"
+        Note that wikimetrics user already exists, puppet has created it.
+    """
+    
+    # Set TESTING to true so we can know to not check CSRF
+    # TODO we need a global config object
+    app.config['TESTING'] = True
+    db.config['TESTING'] = True
+    
+    user = db.config["MEDIAWIKI"]["USER"]
+    password = db.config["MEDIAWIKI"]["PASSWORD"]
+    host = db.config["MEDIAWIKI"]["HOST"]
+    
+    # hardcode valid projecthostnames for testing
+    db.config["PROJECT_HOST_NAMES"] = ['wiki', 'dewiki']
+    
+    # add testing suffix to db
+    db.config["WIKIMETRICS"]["DBNAME"] = db.config["WIKIMETRICS"]["DBNAME"] + 
'_testing'
+    db.config["MEDIAWIKI"]["DBNAME"] = db.config["MEDIAWIKI"]["DBNAME"] + 
'_testing'
+    dbNameMediawiki = db.config["MEDIAWIKI"]["DBNAME"]
+    dbNameWikimetrics = db.config["WIKIMETRICS"]["DBNAME"]
+    
+    # change db connection strings to connect to testing db
+    db.config["MEDIAWIKI_ENGINE_URL_TEMPLATE"] = compose_connection_string(
+        user, password, host, dbNameMediawiki)
+    db.config["WIKIMETRICS_ENGINE_URL"] = compose_connection_string(
+        db.config["WIKIMETRICS"]["USER"],
+        db.config["WIKIMETRICS"]["PASSWORD"], host, dbNameWikimetrics)
+    
+    _db = mysql.connect(host=host, user=user, passwd=password)
+    cursor = _db.cursor()
+    # create testing dbs
+    for dbName in (dbNameMediawiki, dbNameWikimetrics, 'dewiki_testing'):
+        create_test_db(dbName, cursor)
+    _db.close()
+
+
+#Initializing testing databases before setup method
+setUpTestingDB()
 
 
 def celery_is_alive():
@@ -23,9 +88,10 @@
 
 
 def setUp():
-    # Set TESTING to true so we can know to not check CSRF
-    app.config['TESTING'] = True
-    
+    """
+    Set global testing variables and override database names s
+    so they have "_testing" as a suffix
+    """
     celery_out = open(devnull, "w")
     celery_cmd = ['wikimetrics', '--mode', 'queue']
     global celery_proc
@@ -42,3 +108,10 @@
     global celery_proc
     if celery_proc is not None:
         celery_proc.send_signal(SIGINT)
+
+
+def i(date_object):
+    """
+    helper function to convert dates into integers representing mediawiki 
timestamps
+    """
+    return int(format_date(date_object))
diff --git a/tests/fixtures.py b/tests/fixtures.py
index cd58aac..523a2c3 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -15,6 +15,7 @@
     'i',
     'd',
     'tz_note',
+    'mediawikiProject'
 ]
 
 
@@ -54,6 +55,8 @@
     MediawikiUser,
     Logging,
 )
+
+mediawikiProject = db.config['PROJECT_HOST_NAMES'][0]
 
 
 class DatabaseTest(unittest.TestCase):
@@ -119,11 +122,11 @@
                                         assign all pages to the same editor or 
an
                                         array to specify each one individually
             owner_user_id           : record in the User table that owns this 
cohort
-
+            
             page_touched            : <needs to be filled in>
-
-            user-email_token_expires: <needs to be filled in>
-
+            
+            user_email_token_expires: <needs to be filled in>
+        
         Returns
             Nothing but creates the following, to be accessed in a test:
               self.cohort       : owned by web_test_user, contains self.editors
@@ -155,7 +158,7 @@
         if type(user_registrations) is int:
             user_registrations = [user_registrations] * editor_count
         
-        self.project = 'wiki'
+        self.project = mediawikiProject
         
         self.cohort = Cohort(
             name='{0}-cohort'.format(name),
@@ -167,7 +170,7 @@
         self.session.commit()
         
         self.page = Page(page_namespace=0, page_title='{0}-page'.format(name),
-                    page_touched=page_touched)
+                         page_touched=page_touched)
         self.mwSession.add(self.page)
         self.mwSession.commit()
         
@@ -404,11 +407,11 @@
         #****************************************************************
         # set up and clean database (Warning: this DESTROYS ALL DATA)
         #****************************************************************
-        project = 'wiki'
         self.session = db.get_session()
-        engine = db.get_mw_engine(project)
+        engine = db.get_mw_engine(mediawikiProject)
         db.MediawikiBase.metadata.create_all(engine, checkfirst=True)
-        self.mwSession = db.get_mw_session(project)
+        # mediawikiProject is a global defined on this file
+        self.mwSession = db.get_mw_session(mediawikiProject)
         DatabaseTest.tearDown(self)
     
     def tearDown(self):
@@ -494,15 +497,14 @@
 
 
 class DatabaseWithSurvivorCohortTest(unittest.TestCase):
-
+    
     def acquireDBHandles(self):
-        project = 'wiki'
         self.session = db.get_session()
-        engine = db.get_mw_engine(project)
+        engine = db.get_mw_engine(mediawikiProject)
         db.MediawikiBase.metadata.create_all(engine, checkfirst=True)
-        self.mwSession = db.get_mw_session(project)
+        self.mwSession = db.get_mw_session(mediawikiProject)
         self.survivors_namespace = 0
-
+    
     def clearWikimetrics(self):
         self.session.query(CohortWikiUser).delete()
         self.session.query(CohortUser).delete()
@@ -512,7 +514,7 @@
         self.session.query(PersistentReport).delete()
         self.session.commit()
         self.session.close()
-
+    
     def clearMediawiki(self):
         self.mwSession.query(Logging).delete()
         self.mwSession.query(Revision).delete()
@@ -520,7 +522,7 @@
         self.mwSession.query(Page).delete()
         self.mwSession.commit()
         self.mwSession.close()
-
+    
     def createUsers(self):
         mw_user_dan = MediawikiUser(user_name='Dan')
         mw_user_evan = MediawikiUser(user_name='Evan')
@@ -529,29 +531,31 @@
         self.mwSession.add_all([mw_user_dan, mw_user_evan,
                                mw_user_andrew, mw_user_diederik])
         self.mwSession.commit()
-
+        
         wu_dan = WikiUser(mediawiki_username='Dan', valid=True,
-                          mediawiki_userid=mw_user_dan.user_id, project='wiki')
-        wu_evan = WikiUser(mediawiki_username='Evan', valid=True,
-                           mediawiki_userid=mw_user_evan.user_id, 
project='wiki')
+                          mediawiki_userid=mw_user_dan.user_id, 
project=mediawikiProject)
+        wu_evan = WikiUser(mediawiki_username='Evan',
+                           valid=True, mediawiki_userid=mw_user_evan.user_id,
+                           project=mediawikiProject)
         wu_andrew = WikiUser(mediawiki_username='Andrew', valid=True,
-                             mediawiki_userid=mw_user_andrew.user_id, 
project='wiki')
+                             mediawiki_userid=mw_user_andrew.user_id,
+                             project=mediawikiProject)
         wu_diederik = WikiUser(mediawiki_username='Diederik', valid=True,
                                mediawiki_userid=mw_user_diederik.user_id,
-                               project='wiki')
+                               project=mediawikiProject)
         self.session.add_all([wu_dan, wu_evan, wu_andrew, wu_diederik])
         self.session.commit()
-
+        
         self.dan_id = wu_dan.id
         self.evan_id = wu_evan.id
         self.andrew_id = wu_andrew.id
         self.diederik_id = wu_diederik.id
-
+        
         self.mw_dan_id = mw_user_dan.user_id
         self.mw_evan_id = mw_user_evan.user_id
         self.mw_andrew_id = mw_user_andrew.user_id
         self.mw_diederik_id = mw_user_diederik.user_id
-
+    
     def createCohort(self):
         self.cohort = Cohort(
             name='demo-survivor-cohort',
@@ -562,7 +566,7 @@
         )
         self.session.add(self.cohort)
         self.session.commit()
-
+        
         ids = [self.dan_id, self.evan_id, self.andrew_id, self.diederik_id]
         for wiki_editor_id in ids:
             cohort_wiki_editor = CohortWikiUser(
@@ -571,33 +575,33 @@
             )
             self.session.add(cohort_wiki_editor)
             self.session.commit()
-
+    
     # update dan,evan,andrew,diederik user_registration timestamp
     def updateSurvivorRegistrationData(self):
         registration_date_dan    = format_date(datetime(2013, 1, 1))
         registration_date_evan   = format_date(datetime(2013, 1, 2))
         registration_date_andrew = format_date(datetime(2013, 1, 3))
-
+        
         self.mwSession.query(MediawikiUser) \
             .filter(MediawikiUser.user_id == self.mw_dan_id) \
             .update({"user_registration": registration_date_dan})
-
+        
         self.mwSession.query(MediawikiUser) \
             .filter(MediawikiUser.user_id == self.mw_evan_id) \
             .update({"user_registration": registration_date_evan})
-
+        
         self.mwSession.query(MediawikiUser) \
             .filter(MediawikiUser.user_id == self.mw_andrew_id) \
             .update({"user_registration": registration_date_andrew})
-
+    
     def createPageForSurvivors(self):
         self.page = Page(page_namespace=self.survivors_namespace,
                          page_title='SurvivorTestPage')
         self.mwSession.add_all([self.page])
         self.mwSession.commit()
-
+    
     def createRevisionsForSurvivors(self):
-
+        
         # create a revision for user with id uid at time t
         def createCustomRevision(uid, t):
             r = Revision(
@@ -610,20 +614,20 @@
             )
             self.mwSession.add(r)
             self.mwSession.commit()
-
+        
         createCustomRevision(self.mw_dan_id, datetime(2013, 1, 1))
         createCustomRevision(self.mw_dan_id, datetime(2013, 1, 2))
         createCustomRevision(self.mw_dan_id, datetime(2013, 1, 3))
-
+        
         createCustomRevision(self.mw_evan_id, datetime(2013, 1, 2))
         createCustomRevision(self.mw_evan_id, datetime(2013, 1, 3))
         createCustomRevision(self.mw_evan_id, datetime(2013, 1, 4))
-
+        
         createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 3))
         createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 4))
         createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 5))
         createCustomRevision(self.mw_andrew_id, datetime(2013, 1, 6))
-
+    
     def setUp(self):
         self.acquireDBHandles()
         self.clearWikimetrics()
@@ -633,6 +637,6 @@
         self.updateSurvivorRegistrationData()
         self.createPageForSurvivors()
         self.createRevisionsForSurvivors()
-
+    
     def runTest(self):
         pass
diff --git a/tests/test_controllers/test_cohorts.py 
b/tests/test_controllers/test_cohorts.py
index d97ac8c..1454502 100644
--- a/tests/test_controllers/test_cohorts.py
+++ b/tests/test_controllers/test_cohorts.py
@@ -2,7 +2,7 @@
 import time
 from StringIO import StringIO
 from nose.tools import assert_equal, assert_true, assert_false, raises, nottest
-
+from wikimetrics.configurables import app
 from tests.fixtures import WebTest
 from wikimetrics.models import (
     Cohort, CohortUser, CohortUserRole, ValidateCohort,
@@ -64,8 +64,7 @@
     
     def test_detail_by_name_after_async_validate(self):
         self.helper_reset_validation()
-        
-        validate_cohort = ValidateCohort(self.cohort)
+        validate_cohort = ValidateCohort(self.cohort, app.config)
         async_result = validate_cohort.task.delay(validate_cohort)
         self.cohort.validation_queue_key = async_result.task_id
         async_result.get()
diff --git a/tests/test_core_classes.py b/tests/test_core_classes.py
index 72783bf..c03f578 100644
--- a/tests/test_core_classes.py
+++ b/tests/test_core_classes.py
@@ -1,23 +1,37 @@
 import os
 from unittest import TestCase
 from nose.tools import assert_equals, assert_true
-from wikimetrics.configurables import db
+from wikimetrics.configurables import db, parse_db_connection_string
 from wikimetrics.database import get_host_projects, get_host_projects_map
 
 
 class DatabaseSetupTest(TestCase):
-    
+    """"
+    These tests access the 'live" project_host_map
+    thus they make an http connection to get it.
+    The rest of the tests should would offline as they do not access
+    the function get_host_projects directly.
+    """
     def test_get_host_projects(self):
         (host_one, projects) = get_host_projects(1)
         assert_equals(host_one, 1)
-        assert_true('wiki' in projects)
+        assert_true('enwiki' in projects)
     
     def test_get_host_projects_map(self):
         project_host_map = get_host_projects_map()
-        assert_true('wiki' in project_host_map)
-        assert_true('arwiki' in project_host_map)
-        assert_true('commonswiki' in project_host_map)
-    
+        assert_true('enwiki' in project_host_map)
+        assert_true('dewiki' in project_host_map)
+        
+    def test_parse_db_connection_string(self):
+        url = "mysql://wikimetrics:wikimetrics@localhost/wikimetrics"
+        user, password, host, dbName = parse_db_connection_string(url)
+        assert_equals(user, 'wikimetrics')
+        assert_equals(password, 'wikimetrics')
+        assert_equals(host, 'localhost')
+        assert_equals(dbName, 'wikimetrics')
+        
+        
+       
     #def test_get_fresh_project_host_map(self):
         #project_host_map_cache_file = 'project_host_map.json'
         ## make sure any cached file is deleted
diff --git a/tests/test_metrics/test_revert_rate.py 
b/tests/test_metrics/test_revert_rate.py
index b0f22aa..6efec01 100644
--- a/tests/test_metrics/test_revert_rate.py
+++ b/tests/test_metrics/test_revert_rate.py
@@ -4,6 +4,7 @@
 from wikimetrics.metrics import RevertRate, TimeseriesChoices
 from wikimetrics.models import Cohort, MetricReport
 
+
 class RevertRateTest(DatabaseTest):
 
     def setUp(self):
@@ -20,6 +21,7 @@
                 [2, 4, 5],  # User B reverts user A's edit #3 back to edit #2.
             ],
         )
+
     @nottest
     def test_single_revert(self):
         metric = RevertRate(
diff --git a/tests/test_models/test_user.py b/tests/test_models/test_user.py
index c02104c..20806a0 100644
--- a/tests/test_models/test_user.py
+++ b/tests/test_models/test_user.py
@@ -31,5 +31,4 @@
         user = self.session.query(User).get(self.owner_user_id)
         user.authenticated = False
         self.session.commit()
-        
         assert_equal(user.is_anonymous(), True)
diff --git a/tests/test_models/test_validate_cohort.py 
b/tests/test_models/test_validate_cohort.py
index 836dd95..3179109 100644
--- a/tests/test_models/test_validate_cohort.py
+++ b/tests/test_models/test_validate_cohort.py
@@ -1,22 +1,28 @@
 import unittest
 from nose.tools import assert_equal, raises, assert_true, assert_false
-from tests.fixtures import WebTest, QueueDatabaseTest
+from wikimetrics.configurables import app, db
+from tests.fixtures import WebTest, QueueDatabaseTest, mediawikiProject
 from wikimetrics.controllers.forms import CohortUpload
 from wikimetrics.models import (
     MediawikiUser, Cohort, WikiUser, ValidateCohort, User,
     normalize_project,
 )
 
+# do not hardcode project name
+
+# add 'enwiki' as a valid projectname for these tests test
+db.config['PROJECT_HOST_NAMES'].append('enwiki')
+
 
 class ValidateCohortTest(WebTest):
     
     def test_normalize_project_shorthand(self):
         normal = normalize_project('en')
-        assert_equal(normal, 'wiki')
+        assert_equal(normal, 'enwiki')
     
     def test_normalize_project_uppercase(self):
-        normal = normalize_project('ENWIKI')
-        assert_equal(normal, 'wiki')
+        normal = normalize_project(mediawikiProject.upper())
+        assert_equal(normal, mediawikiProject)
     
     def test_normalize_project_nonexistent(self):
         normal = normalize_project('blah')
@@ -26,7 +32,7 @@
         self.helper_reset_validation()
         self.cohort.validate_as_user_ids = False
         self.session.commit()
-        v = ValidateCohort(self.cohort)
+        v = ValidateCohort(self.cohort, app.config)
         v.validate_records(self.session, self.cohort)
         
         assert_equal(self.cohort.validated, True)
@@ -44,7 +50,7 @@
         wikiusers[0].project = 'blah'
         wikiusers[1].mediawiki_username = 'blah'
         self.session.commit()
-        v = ValidateCohort(self.cohort)
+        v = ValidateCohort(self.cohort, app.config)
         v.validate_records(self.session, self.cohort)
         
         assert_equal(self.cohort.validated, True)
@@ -63,7 +69,7 @@
 
 
 class ValidateCohortQueueTest(QueueDatabaseTest):
-
+    
     def setUp(self):
         QueueDatabaseTest.setUp(self)
         
@@ -77,23 +83,24 @@
         self.owner_user_id = owner_user.id
     
     def test_small_cohort(self):
+        
         cohort_upload = CohortUpload()
         cohort_upload.name.data = 'small_cohort'
-        cohort_upload.project.data = 'wiki'
+        cohort_upload.project.data = mediawikiProject
         cohort_upload.records = [
             # two existing users
-            {'username': 'Editor test-specific-0', 'project': 'wiki'},
-            {'username': 'Editor test-specific-1', 'project': 'wiki'},
+            {'username': 'Editor test-specific-0', 'project': 
mediawikiProject},
+            {'username': 'Editor test-specific-1', 'project': 
mediawikiProject},
             # one invalid username
-            {'username': 'Nonexisting', 'project': 'wiki'},
+            {'username': 'Nonexisting', 'project': mediawikiProject},
             # one user with invalid project
             {'username': 'Nonexisting2', 'project': 'Nonexisting'},
         ]
-
-        v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id)
+        
+        v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id, 
app.config)
         v.task.delay(v).get()
         self.session.commit()
-
+        
         assert_equal(self.session.query(WikiUser).filter(
             WikiUser.mediawiki_username == 'Editor 
test-specific-0').one().valid, True)
         assert_equal(self.session.query(WikiUser).filter(
@@ -109,7 +116,7 @@
         cohort_upload.project.data = 'wiki'
         cohort_upload.records = [{'fake': 'dict'}]
         
-        v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id)
+        v = ValidateCohort.from_upload(cohort_upload, self.owner_user_id, 
app.config)
         assert_equal(v, None)
 
 
@@ -117,5 +124,5 @@
     
     def test_repr(self):
         cohort = Cohort(id=1)
-        v = ValidateCohort(cohort)
+        v = ValidateCohort(cohort, app.config)
         assert_equal(str(v), '<ValidateCohort("1")>')
diff --git a/tests/test_utils/test_one_off_functions.py 
b/tests/test_utils/test_one_off_functions.py
index 959acc5..407d15c 100644
--- a/tests/test_utils/test_one_off_functions.py
+++ b/tests/test_utils/test_one_off_functions.py
@@ -75,7 +75,7 @@
         assert_equal(project, 'en')
     
     def test_project_name_for_link_with_wiki(self):
-        project = project_name_for_link('wiki')
+        project = project_name_for_link('enwiki')
         assert_equal(project, 'en')
     
     def test_link_to_user_page(self):
diff --git a/wikimetrics/configurables.py b/wikimetrics/configurables.py
index 96ec252..d70b5ae 100644
--- a/wikimetrics/configurables.py
+++ b/wikimetrics/configurables.py
@@ -4,6 +4,23 @@
 import subprocess
 
 
+def parse_db_connection_string(urlConnectionString):
+    """
+    From a url like: mysql://wikimetrics:wikimetrics@localhost/wikimetrics
+    exracts user, password, host, dbName
+    """
+    from urlparse import urlparse
+    parsed = urlparse(urlConnectionString)
+    # results in
+    # ParseResult(scheme='mysql', netloc='root:vagrant@localhost',
+    # path='/wiki', params='', query='', fragment='')
+    netloc = parsed.netloc
+    user = netloc.split(":")[0]
+    password, host = netloc.split(":")[1].split("@")
+    dbName = parsed.path.split("/")[1]
+    return user, password, host, dbName
+
+
 # TODO: does not work in labs environment
 def create_object_from_config_file(path):
     dir, fname = os.path.split(path)
@@ -147,6 +164,10 @@
 # TODO: look into making a single config object that has empty sections if
 # some roles are not used (or maybe dependency injection)
 def config_db(args):
+    """
+    Initializes the config object with what's passed in, further splits the 
config
+    to get a user,password, host and dbName
+    """
     from .database import Database
     
     db_config = create_dict_from_text_config_file(args.db_config)
@@ -155,6 +176,26 @@
         db_config.__dict__.update(config_override)
     
     global db
+    user, password, host, dbName = parse_db_connection_string(
+        db_config["MEDIAWIKI_ENGINE_URL_TEMPLATE"])
+    db_config["MEDIAWIKI"] = {}
+    db_config["MEDIAWIKI"]["USER"] = user
+    db_config["MEDIAWIKI"]["PASSWORD"] = password
+    db_config["MEDIAWIKI"]["HOST"] = host
+    db_config["MEDIAWIKI"]["DBNAME"] = dbName
+    
+    user, password, host, dbName = parse_db_connection_string(
+        db_config["WIKIMETRICS_ENGINE_URL"])
+    db_config["WIKIMETRICS"] = {}
+    db_config["WIKIMETRICS"]["USER"] = user
+    db_config["WIKIMETRICS"]["PASSWORD"] = password
+    db_config["WIKIMETRICS"]["HOST"] = host
+    db_config["WIKIMETRICS"]["DBNAME"] = dbName
+    
+    # test setup will override this setting if needed
+    if db_config["DEBUG"]:
+        db_config["PROJECT_HOST_NAMES"] = [db_config["MEDIAWIKI"]["DBNAME"]]
+    
     db = Database(db_config)
 
 
diff --git a/wikimetrics/controllers/cohorts.py 
b/wikimetrics/controllers/cohorts.py
index 4addb40..914d8c9 100644
--- a/wikimetrics/controllers/cohorts.py
+++ b/wikimetrics/controllers/cohorts.py
@@ -163,7 +163,7 @@
                 flash('That Cohort name is already taken.', 'warning')
             else:
                 form.parse_records()
-                vc = ValidateCohort.from_upload(form, current_user.id)
+                vc = ValidateCohort.from_upload(form, current_user.id, 
app.config)
                 vc.task.delay(vc)
                 return redirect('{0}#{1}'.format(
                     url_for('cohorts_index'),
@@ -175,7 +175,7 @@
     
     return render_template(
         'csv_upload.html',
-        projects=json.dumps(sorted(db.project_host_map.keys())),
+        projects=json.dumps(sorted(db.get_project_host_map().keys())),
         form=form,
     )
 
@@ -201,7 +201,7 @@
 @app.route('/cohorts/validate/project')
 def validate_cohort_project_allowed():
     project = request.args.get('project')
-    valid = project in db.project_host_map
+    valid = project in db.get_project_host_map()
     return json.dumps(valid)
 
 
@@ -212,7 +212,8 @@
     try:
         cohort = Cohort.get_safely(session, current_user.id, by_id=cohort_id)
         name = cohort.name
-        vc = ValidateCohort(cohort)
+        # TODO we need some kind of global config that is not db specific
+        vc = ValidateCohort(cohort, app.config)
         vc.task.delay(vc)
         return json_response(message='Validating cohort "{0}"'.format(name))
     except Unauthorized:
diff --git a/wikimetrics/database.py b/wikimetrics/database.py
index 6733080..e9c5def 100644
--- a/wikimetrics/database.py
+++ b/wikimetrics/database.py
@@ -40,16 +40,15 @@
 
 
 def get_host_projects_map():
+    project_host_map = {}
     # TODO: these numbers are hardcoded, is that ok?
     num_hosts = 7
     host_projects = map(get_host_projects, range(1, num_hosts + 1))
-    project_host_map = {}
     host_fmt = 's{0}'
     for host_id, projects in host_projects:
         host = host_fmt.format(host_id)
         for project in projects:
             project_host_map[project] = host
-    
     return project_host_map
 
 
@@ -62,22 +61,25 @@
     
     def __init__(self, config):
         """
-        Initializes the config object with what's passed in
         Initializes the declarative bases that are used throughout the project.
         Initializes the empty engines and sessionmakers that support
         `get_session` and `get_mw_session`.
         """
+        
         self.config = config
+        
         self.WikimetricsBase = declarative_base(cls=SerializableBase)
         self.MediawikiBase = declarative_base(cls=SerializableBase)
-
+        
         self.wikimetrics_engine = None
         self.wikimetrics_sessionmaker = None
         
         self.mediawiki_engines = {}
         self.mediawiki_sessionmakers = {}
-        self.project_host_map = self.get_project_host_map(usecache=True)
-    
+        
+        # we instantiate project_host_map lazily
+        self._project_host_map = None
+
     def get_session(self):
         """
         On the first run, instantiates the Wikimetrics session maker
@@ -98,7 +100,7 @@
             import wikimetrics.models
             self.WikimetricsBase.metadata.create_all(
                 self.wikimetrics_engine,
-                checkfirst=True
+                checkfirst=True,
             )
             self.wikimetrics_sessionmaker = 
sessionmaker(self.wikimetrics_engine)
         
@@ -125,7 +127,7 @@
                     engine,
                     checkfirst=True
                 )
-
+            
             # Assuming that we're not using the real mediawiki databases in 
debug mode,
             # we have to create the tables
             #if self.config['DEBUG']:
@@ -149,9 +151,6 @@
             return self.mediawiki_engines[project]
         else:
             engine_template = self.config['MEDIAWIKI_ENGINE_URL_TEMPLATE']
-            # This allows vagrant's wiki database to be used and configured 
separately
-            if self.config['DEBUG'] and project == 'wiki':
-                engine_template = self.config['DEV_ENGINE_URL_TEMPLATE']
             
             engine = create_engine(
                 engine_template.format(project),
@@ -164,30 +163,44 @@
     def get_project_host_map(self, usecache=True):
         """
         Retrieves the list of mediawiki projects from noc.wikimedia.org.
+        If we are on development or testing project_host_map
+        does not access the network to verify project names.
+        Project names are hardcoded.
+        
+        Note that the project_host_map_list is fetched
+        not at the time we construct the object
+        but the first time we request it
         
         Parameters:
             usecache    : defaults to True and uses a local cache if available
+        
         """
-        cache_name = 'project_host_map.json'
-        if not exists(cache_name) or not usecache:
+        if self._project_host_map == None or usecache == False:
+            project_host_map = {}
             
-            project_host_map = get_host_projects_map()
-            if usecache and os.access(cache_name, os.W_OK):
-                try:
-                    json.dump(project_host_map, open(cache_name, 'w'))
-                except:
-                    print('No rights to write project host map cache 
{0}'.format(
-                        os.path.abspath(cache_name)
-                    ))
-        elif os.access(cache_name, os.R_OK):
-            project_host_map = json.load(open(cache_name))
-        else:
-            raise Exception('Project host map could not be fetched or read')
-        
-        if self.config['DEBUG']:
-            project_host_map['wiki'] = 'mediawiki-vagrant'
-        
-        return project_host_map
+            if self.config.get("DEBUG"):
+                # tests init.py overrides this setting if needed
+                for p in self.config.get("PROJECT_HOST_NAMES"):
+                    project_host_map[p] = 'localhost'
+            else:
+                # TODO Two processes could come here at the same time. LOCK!
+                cache_name = 'project_host_map.json'
+                if not exists(cache_name) or not usecache:
+                    project_host_map = get_host_projects_map()
+                    if usecache and os.access(cache_name, os.W_OK):
+                        try:
+                            json.dump(project_host_map, open(cache_name, 'w'))
+                        except:
+                            print('No rights to write project host map cache 
{0}'.format(
+                                os.path.abspath(cache_name)
+                            ))
+                elif os.access(cache_name, os.R_OK):
+                    project_host_map = json.load(open(cache_name))
+                else:
+                    raise Exception('Project host map could not be fetched or 
read')
+            
+            self._project_host_map = project_host_map
+        return self._project_host_map
 
 
 @event.listens_for(Pool, "checkout")
@@ -204,7 +217,7 @@
         # optional - dispose the whole pool
         # instead of invalidating one at a time
         # connection_proxy._pool.dispose()
-
+        
         # raise DisconnectionError - pool will try
         # connecting again up to three times before raising.
         raise exc.DisconnectionError()
diff --git a/wikimetrics/models/mediawiki/custom_columns.py 
b/wikimetrics/models/mediawiki/custom_columns.py
index 35adae9..d9aeece 100644
--- a/wikimetrics/models/mediawiki/custom_columns.py
+++ b/wikimetrics/models/mediawiki/custom_columns.py
@@ -32,6 +32,6 @@
         To unbundle we have to detect unicodestrings that are set as mysql 
defaults
         they are not represented by singleton None
         """
-        if not value or value == 
u'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00':
+        if not value or value == 
u"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00":
             return None
         return parse_date(value)
diff --git a/wikimetrics/models/mediawiki/logging.py 
b/wikimetrics/models/mediawiki/logging.py
index 27f20e1..d8de79f 100644
--- a/wikimetrics/models/mediawiki/logging.py
+++ b/wikimetrics/models/mediawiki/logging.py
@@ -12,12 +12,12 @@
     log_id = Column(Integer, primary_key=True)
     log_type = Column(String(32), nullable=False, default='')
     log_action = Column(String(32), nullable=False, default='')
-    log_timestamp = Column(MediawikiTimestamp,nullable=False, 
default=u'19700101000000')
-    log_user = Column(Integer, ForeignKey('user.user_id'), 
nullable=False,default=0)
+    log_timestamp = Column(MediawikiTimestamp, nullable=False, 
default=u'19700101000000')
+    log_user = Column(Integer, ForeignKey('user.user_id'), nullable=False, 
default=0)
     log_namespace = Column(Integer, nullable=False, default=0)
     log_title = Column(String(255), nullable=False, default='')
     log_comment = Column(String(255), nullable=False, default='')
-    log_params= Column(BLOB, nullable=False,default='')
+    log_params = Column(BLOB, nullable=False, default='')
     log_deleted = Column(Boolean, nullable=False, default=0)
     log_user_text = Column(String(255), nullable=False, default='')
-    log_page = Column(Integer, ForeignKey('page.page_id'))
\ No newline at end of file
+    log_page = Column(Integer, ForeignKey('page.page_id'))
diff --git a/wikimetrics/models/mediawiki/revision.py 
b/wikimetrics/models/mediawiki/revision.py
index c9e4046..9956b85 100644
--- a/wikimetrics/models/mediawiki/revision.py
+++ b/wikimetrics/models/mediawiki/revision.py
@@ -15,7 +15,7 @@
     rev_user = Column(Integer, ForeignKey('user.user_id'), nullable=False, 
default=0)
     rev_user_text = Column(String(255), nullable=False, default='')
     rev_timestamp = Column(MediawikiTimestamp, nullable=False,
-            default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0')
+                           default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0')
     rev_minor_edit = Column(Integer, nullable=False, default='0')
     # this might be a boolean but it gets overflown if set that way
     rev_deleted = Column(Integer)
diff --git a/wikimetrics/models/mediawiki/user.py 
b/wikimetrics/models/mediawiki/user.py
index 177ca0f..ecb6bd4 100644
--- a/wikimetrics/models/mediawiki/user.py
+++ b/wikimetrics/models/mediawiki/user.py
@@ -18,9 +18,9 @@
     user_newpass_time = Column(MediawikiTimestamp)
     user_email = Column(String(255), nullable=False, default='')
     user_touched = Column(MediawikiTimestamp, nullable=False,
-        default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0')
+                          default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0')
     user_token = Column(String(255), nullable=False,
-        
default=u'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0')
+                        
default=u'''\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0''')
     user_email_authenticated = Column(MediawikiTimestamp)
     user_email_token = Column(String(255))
     user_email_token_expires = Column(MediawikiTimestamp)
diff --git a/wikimetrics/models/validate_cohort.py 
b/wikimetrics/models/validate_cohort.py
index 6b24fca..45fe040 100644
--- a/wikimetrics/models/validate_cohort.py
+++ b/wikimetrics/models/validate_cohort.py
@@ -32,10 +32,12 @@
     """
     task = async_validate
     
-    def __init__(self, cohort):
+    def __init__(self, cohort, config):
         """
         Parameters:
             cohort  : an existing cohort
+            config  : global config, we need to know
+                if we are on dev or testing to validate project name
         
         Instantiates with these properties:
             cohort_id               : id of an existing cohort with validated 
== False
@@ -44,9 +46,10 @@
         """
         self.cohort_id = cohort.id
         self.validate_as_user_ids = cohort.validate_as_user_ids
+        self.config = config
     
     @classmethod
-    def from_upload(cls, cohort_upload, owner_user_id):
+    def from_upload(cls, cohort_upload, owner_user_id, config):
         """
         Create a new cohort and validate a list of uploaded users for it
         
@@ -91,7 +94,7 @@
                 ]
             )
             session.commit()
-            return cls(cohort)
+            return cls(cohort, config)
         except Exception, e:
             app.logger.error(str(e))
             return None
@@ -122,6 +125,7 @@
             session : an active wikimetrics db session to use
             cohort  : the cohort to validate; must belong to session
         """
+        
         # reset the cohort validation status so it can't be used for reports
         cohort.validated = False
         session.execute(
@@ -203,14 +207,17 @@
 def normalize_project(project):
     """
     Decides whether the name of the project is a valid one
+    There are differences in db names in local setup versus vagrant setup
+    While local setup uses enwiki mediawiki vagrant uses wiki
+    We let 'wiki' be an acceptable name in development.
     """
     project = project.strip().lower()
-    if project in db.project_host_map:
+    if project in db.get_project_host_map():
         return project
     else:
         # try adding wiki to end
         new_proj = project + 'wiki'
-        if new_proj not in db.project_host_map:
+        if new_proj not in db.get_project_host_map():
             return None
         else:
             return new_proj
diff --git a/wikimetrics/templates/forms/metric_configuration.html 
b/wikimetrics/templates/forms/metric_configuration.html
index 0c4da93..34489a0 100644
--- a/wikimetrics/templates/forms/metric_configuration.html
+++ b/wikimetrics/templates/forms/metric_configuration.html
@@ -31,7 +31,7 @@
                         <li>Examples:
                             <a target="_blank" 
href="https://de.wikipedia.org/wiki/Wikipedia:Namespace";>dewiki</a>,
                             <a target="_blank" 
href="https://commons.wikimedia.org/wiki/Help:Namespaces";>commons</a>,
-                            <a target="_blank" 
href="https://en.wikipedia.org/wiki/Wikipedia:Namespace";>wiki</a></li>
+                            <a target="_blank" 
href="https://en.wikipedia.org/wiki/Wikipedia:Namespace";>enwiki</a></li>
                     </ul>
                 </div>
             {% endif %}

-- 
To view, visit https://gerrit.wikimedia.org/r/110945
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I448516e983185266f8c54cf102641cea6de2807a
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Nuria <nu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to