Lokal Profil has uploaded a new change for review. https://gerrit.wikimedia.org/r/295594
Change subject: Add wikidata connection to monuments_all and Qid tester to updater ...................................................................... Add wikidata connection to monuments_all and Qid tester to updater Also adds monuments_ru_(ru) as a first test case since they already have 101012 ids in their list. Bug: T55808 Change-Id: I8fb3db59e550826a86f2a85d40f140b7affba316 --- M erfgoedbot/monuments_config.py M erfgoedbot/sql/fill_table_monuments_all.sql M erfgoedbot/update_database.py M tests/test_monuments_config.py M tests/test_update_database.py 5 files changed, 111 insertions(+), 3 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage refs/changes/94/295594/1 diff --git a/erfgoedbot/monuments_config.py b/erfgoedbot/monuments_config.py index a8bbc36..e161391 100755 --- a/erfgoedbot/monuments_config.py +++ b/erfgoedbot/monuments_config.py @@ -6755,6 +6755,11 @@ 'dest': u'registrant_url', 'conv': u'generateRegistrantUrl', }, + { + 'source': u'wdid', + 'dest': u'wd_item', + 'check': u'checkWD', + }, ], }, ('se-bbr', 'sv'): { # BBR Monuments in Sweden in Swedish diff --git a/erfgoedbot/sql/fill_table_monuments_all.sql b/erfgoedbot/sql/fill_table_monuments_all.sql index 587ee2c..0cbd01f 100644 --- a/erfgoedbot/sql/fill_table_monuments_all.sql +++ b/erfgoedbot/sql/fill_table_monuments_all.sql @@ -30,6 +30,7 @@ `lat_int` smallint(6) DEFAULT NULL, `lon_int` smallint(6) DEFAULT NULL, `image` varchar(255) NOT NULL DEFAULT '', + `wd_item` varchar(255) DEFAULT NULL, `commonscat` varchar(255) NOT NULL DEFAULT '', `source` varchar(510) NOT NULL DEFAULT '', `changed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, @@ -2292,7 +2293,7 @@ /* Russia in Russian */ REPLACE INTO `monuments_all_tmp` ( - `country`, `lang`, `project`, `id`, `adm0`, `adm1`, `adm2`, `adm3`, `adm4`, `name`, `address`, `municipality`, `lat`, `lon`, `lat_int`, `lon_int`, `image`, `commonscat`, `source`, `changed`, `monument_article`, `registrant_url` + `country`, `lang`, `project`, `id`, `adm0`, `adm1`, `adm2`, `adm3`, `adm4`, `name`, `address`, `municipality`, `lat`, `lon`, `lat_int`, `lon_int`, `image`, `commonscat`, `source`, `changed`, `monument_article`, `registrant_url`, `wd_item` ) SELECT 'ru' AS `country`, 'ru' AS `lang`, @@ -2315,7 +2316,8 @@ `source` AS `source`, `changed` AS `changed`, `monument_article` AS `monument_article`, - `registrant_url` AS `registrant_url` + `registrant_url` AS `registrant_url`, + `wd_item` AS `wd_item` FROM `monuments_ru_(ru)`; /* Sweden (BBR Monuments) in Swedish */ diff --git a/erfgoedbot/update_database.py b/erfgoedbot/update_database.py index f337d2a..f2e6c94 100755 --- a/erfgoedbot/update_database.py +++ b/erfgoedbot/update_database.py @@ -128,12 +128,35 @@ return True +def is_int(s): + """Check if a string is a valid int.""" + try: + int(s) + return True + except (ValueError, TypeError): + return False + + +def check_wikidata(wd_item, monumentKey, sourcePage): + """Check that a value is a potential wikidata entity.""" + if len(wd_item): + if wd_item.startswith('Q') and is_int(wd_item[1:]): + return True + else: + errorMsg = u"Invalid wikidata value: %s for monument %s" % ( + wd_item, monumentKey) + reportDataError(errorMsg, sourcePage, monumentKey) + return False + + def run_check(check, fieldValue, monumentKey, countryconfig, sourcePage): """Run a named check.""" if check == 'checkLat': return checkLat(fieldValue, monumentKey, countryconfig, sourcePage) elif check == 'checkLon': return checkLon(fieldValue, monumentKey, countryconfig, sourcePage) + elif check == 'checkWD': + return check_wikidata(fieldValue, monumentKey, sourcePage) else: raise pywikibot.Error('Un-defined check in config for %s: %s' % (countryconfig.get('table'), check)) diff --git a/tests/test_monuments_config.py b/tests/test_monuments_config.py index 47d0ad1..c3e4843 100644 --- a/tests/test_monuments_config.py +++ b/tests/test_monuments_config.py @@ -148,7 +148,7 @@ def test_monuments_config_known_checkers(self): """Ensure the only known checkers are used in field entries.""" - recognized = ['checkLon', 'checkLat'] + recognized = ['checkLon', 'checkLat', 'checkWD'] for key, data in config.countries.iteritems(): self.set_label(key) for field in data['fields']: diff --git a/tests/test_update_database.py b/tests/test_update_database.py index be1be37..783157c 100644 --- a/tests/test_update_database.py +++ b/tests/test_update_database.py @@ -282,6 +282,21 @@ update_database.updateMonument(self.contents, self.source, self.country_config, None, self.mock_cursor, self.mock_page) mock_checkLon.assert_called_once_with(lon, self.monumentKey, self.country_config, self.mock_page) + def test_trigger_checkWD(self): + self.country_config['fields'].append( + { + 'source': u'wd_item', + 'dest': u'wd_item', + 'check': u'checkWD', + } + ) + wd_item = 'Q123' + self.contents[u'wd_item'] = wd_item + + with mock.patch('erfgoedbot.update_database.check_wikidata', autospec=True) as mock_check_wikidata: + update_database.updateMonument(self.contents, self.source, self.country_config, None, self.mock_cursor, self.mock_page) + mock_check_wikidata.assert_called_once_with(wd_item, self.monumentKey, self.mock_page) + def test_trigger_unknown_check(self): self.country_config['fields'].append( { @@ -454,3 +469,66 @@ lon = '13.37' result = update_database.checkLon(lon, self.monumentKey, self.country_config, self.mock_page) self.assertEqual(result, True) + + +class TestCheckWikidata(TestUpdateDatabaseBase): + + def setUp(self): + super(TestCheckWikidata, self).setUp() + self.monumentKey = 'Some-key' + + def test_empty_wd_item(self): + wd_item = '' + result = update_database.check_wikidata(wd_item, self.monumentKey, self.mock_page) + self.assertEqual(result, None) + + def test_non_Q_part(self): + wd_item = 'P123' + expected_errorMsg = u"Invalid wikidata value: %s for monument %s" % ( + wd_item, self.monumentKey) + with mock.patch('erfgoedbot.update_database.reportDataError', autospec=True) as mock_reportDataError: + result = update_database.check_wikidata(wd_item, self.monumentKey, self.mock_page) + mock_reportDataError.assert_called_once_with(expected_errorMsg, self.mock_page, self.monumentKey) + self.assertEqual(result, False) + + def test_non_integer_part(self): + wd_item = 'Que?' + expected_errorMsg = u"Invalid wikidata value: %s for monument %s" % ( + wd_item, self.monumentKey) + with mock.patch('erfgoedbot.update_database.reportDataError', autospec=True) as mock_reportDataError: + result = update_database.check_wikidata(wd_item, self.monumentKey, self.mock_page) + mock_reportDataError.assert_called_once_with(expected_errorMsg, self.mock_page, self.monumentKey) + self.assertEqual(result, False) + + def test_valid_wd_item(self): + wd_item = 'Q123' + result = update_database.check_wikidata(wd_item, self.monumentKey, self.mock_page) + self.assertEqual(result, True) + + +class TestIsInt(TestUpdateDatabaseBase): + + def test_empty_string_fail(self): + s = '' + result = update_database.is_int(s) + self.assertEqual(result, False) + + def test_None_fail(self): + s = None + result = update_database.is_int(s) + self.assertEqual(result, False) + + def test_random_string_fail(self): + s = 'random_string' + result = update_database.is_int(s) + self.assertEqual(result, False) + + def test_float_fail(self): + s = '123.456' + result = update_database.is_int(s) + self.assertEqual(result, False) + + def test_valid_int_succeed(self): + s = '123' + result = update_database.is_int(s) + self.assertEqual(result, True) -- To view, visit https://gerrit.wikimedia.org/r/295594 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8fb3db59e550826a86f2a85d40f140b7affba316 Gerrit-PatchSet: 1 Gerrit-Project: labs/tools/heritage Gerrit-Branch: master Gerrit-Owner: Lokal Profil <lokal.pro...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits