[MediaWiki-commits] [Gerrit] operations/mediawiki-config[master]: Drop the medlem user group and editallpages user right

2018-01-18 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/404942 )

Change subject: Drop the medlem user group and editallpages user right
..

Drop the medlem user group and editallpages user right

This group and right are only used by sewikimedia and no longer
fill a function. Rather they interfer with the available grants.

Bug: T184981
Change-Id: I337e518977c79c62f685a82c8a4a14241a561edc
---
M wmf-config/InitialiseSettings.php
1 file changed, 4 insertions(+), 50 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config 
refs/changes/42/404942/1

diff --git a/wmf-config/InitialiseSettings.php 
b/wmf-config/InitialiseSettings.php
index d72a15a..7952b83 100644
--- a/wmf-config/InitialiseSettings.php
+++ b/wmf-config/InitialiseSettings.php
@@ -8042,6 +8042,10 @@
'trwikimedia' => [ '*' => [
'edit' => false,
] ],
+   'sewikimedia' => [ '*' => [
+   'edit' => false,
+   ] ],
+   
 
// Miscellaneous
'+arbcom_enwiki' => [
@@ -9369,29 +9373,6 @@
'+sdwiki' => [
'autopatrolled' => [ 'autopatrol' => true ], // T177141
],
-   '+sewikimedia' => [
-   '*' => [
-   'edit' => false,
-   'editallpages' => false,
-   ],
-   'user' => [
-   'editallpages' => true, // T41671
-   ],
-   'sysop' => [ // T63947
-   'editallpages' => true,
-   ],
-   'medlem' => [
-   'move' => true,
-   'move-subpages' => true,
-   'read' => true,
-   'edit' => true,
-   'createpage' => true,
-   'createtalk' => true,
-   'minoredit' => true,
-   'purge' => true,
-   'editallpages' => true,
-   ],
-   ],
'+simplewiki' => [
'flood' => [ 'bot' => true ],
'rollbacker' => [ 'rollback' => true, ],
@@ -10385,10 +10366,6 @@
'+sdwiki' => [
'sysop' => [ 'autopatrolled' ], // T177141
],
-   '+sewikimedia' => [
-   'bureaucrat' => [ 'medlem' ],
-   'sysop' => [ 'medlem' ],
-   ],
'+simplewiki' => [
'bureaucrat' => [ 'rollbacker', 'transwiki', 'patroller' ],
'sysop' => [ 'rollbacker', 'flood', 'patroller', 'uploader', ], 
// T127826
@@ -11136,9 +3,6 @@
],
'+sdwiki' => [
'sysop' => [ 'autopatrolled' ], // T177141
-   ],
-   '+sewikimedia' => [
-   'bureaucrat' => [ 'sysop', 'bureaucrat', 'medlem' ],
],
'+shwiki' => [
'bureaucrat' => [
@@ -12392,26 +12366,6 @@
],
'ruwiki' => [
106 => [ 'autoconfirmed' ],
-   ],
-   'sewikimedia' => [
-   NS_MAIN => [ 'editallpages' ],
-   NS_TALK => [ 'editallpages' ],
-   NS_USER => [ 'editallpages' ],
-   NS_USER_TALK => [ 'editallpages' ],
-   NS_PROJECT => [ 'editallpages' ],
-   NS_PROJECT_TALK => [ 'editallpages' ],
-   NS_FILE => [ 'editallpages' ],
-   NS_FILE_TALK => [ 'editallpages' ],
-   NS_MEDIAWIKI => [ 'editallpages' ],
-   NS_MEDIAWIKI_TALK => [ 'editallpages' ],
-   NS_TEMPLATE => [ 'editallpages' ],
-   NS_TEMPLATE_TALK => [ 'editallpages' ],
-   NS_HELP => [ 'editallpages' ],
-   NS_HELP_TALK => [ 'editallpages' ],
-   NS_CATEGORY => [ 'editallpages' ],
-   NS_CATEGORY_TALK => [ 'editallpages' ],
-   100 => [ 'editallpages' ],
-   101 => [ 'editallpages' ],
],
'+wikidatawiki' => [
122 => [ 'query-update' ],  // Query namespace per T51001

-- 
To view, visit https://gerrit.wikimedia.org/r/404942
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I337e518977c79c62f685a82c8a4a14241a561edc
Gerrit-PatchSet: 1
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] mediawiki/core[master]: Expose PageLanguageDir

2017-11-29 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/394218 )

Change subject: Expose PageLanguageDir
..

Expose PageLanguageDir

Bug: T181684
Change-Id: Id6762ae758a83c67916fa37be184d5b35c870cdc
---
M includes/OutputPage.php
1 file changed, 1 insertion(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/18/394218/1

diff --git a/includes/OutputPage.php b/includes/OutputPage.php
index a5f9c18..43071b7 100644
--- a/includes/OutputPage.php
+++ b/includes/OutputPage.php
@@ -3197,6 +3197,7 @@
'wgCategories' => $this->getCategories(),
'wgBreakFrames' => $this->getFrameOptions() == 'DENY',
'wgPageContentLanguage' => $lang->getCode(),
+   'wgPageLanguageDir' => $lang->getDir(),
'wgPageContentModel' => $title->getContentModel(),
'wgSeparatorTransformTable' => 
$compactSeparatorTransTable,
'wgDigitTransformTable' => $compactDigitTransTable,

-- 
To view, visit https://gerrit.wikimedia.org/r/394218
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id6762ae758a83c67916fa37be184d5b35c870cdc
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: [BUGFIX]Correct mapping of Commons template for Iraq

2017-11-21 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/392662 )

Change subject: [BUGFIX]Correct mapping of Commons template for Iraq
..

[BUGFIX]Correct mapping of Commons template for Iraq

Bug: T180850
Change-Id: I5dcb788e520ace31bbc5cc3fb098b85b9e73a4e5
---
M erfgoedbot/monuments_config/iq_ar.json
1 file changed, 2 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/62/392662/1

diff --git a/erfgoedbot/monuments_config/iq_ar.json 
b/erfgoedbot/monuments_config/iq_ar.json
index 63c9763..1c42e08 100644
--- a/erfgoedbot/monuments_config/iq_ar.json
+++ b/erfgoedbot/monuments_config/iq_ar.json
@@ -11,7 +11,7 @@
 "primkey": "id",
 "headerTemplate": "رأس معلم العراق",
 "rowTemplate": "صف معلم العراق",
-"commonsTemplate": "Cultural heritage Iraq",
+"commonsTemplate": "Cultural Heritage Iraq",
 "commonsTrackerCategory": "Cultural heritage monuments in Iraq with known 
IDs",
 "commonsCategoryBase": "Cultural heritage monuments in Iraq",
 "unusedImagesPage": "مستخدم:Bachounda/Unusued_images_Iraq",
@@ -130,4 +130,4 @@
 "type": "Field"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/392662
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5dcb788e520ace31bbc5cc3fb098b85b9e73a4e5
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: [BUGFIX]Fix unsafe operation on str that should use unicode

2017-11-21 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/392660 )

Change subject: [BUGFIX]Fix unsafe operation on str that should use unicode
..

[BUGFIX]Fix unsafe operation on str that should use unicode

The fixes were picked from: I2cebd0dfae63ce0022013ee9b010ccc4e4a540d9

Bug: T180692
Change-Id: I0242e1ffd3cb158e09c62260a6ab21688168807e
---
M erfgoedbot/categorize_images.py
M erfgoedbot/monument_tables.py
2 files changed, 5 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/60/392660/1

diff --git a/erfgoedbot/categorize_images.py b/erfgoedbot/categorize_images.py
index f7c61e0..0310850 100644
--- a/erfgoedbot/categorize_images.py
+++ b/erfgoedbot/categorize_images.py
@@ -531,7 +531,7 @@
 total_images = '---'
 
 if row.get('cat'):
-cat_link = '[[:Category:{0}]]'.format(row['cat'])
+cat_link = u'[[:Category:{0}]]'.format(row['cat'])
 pages_in_cat = '{{PAGESINCATEGORY:%s|files}}' % row['cat']
 
 if row.get('template'):
@@ -586,7 +586,7 @@
 def skip(country_code, lang, country_config):
 """Return a outputStatistics compatible summary for a skipped country."""
 site = pywikibot.Site(u'commons', u'commons')
-commons_category_base = pywikibot.Category(site, "{ns}:{cat}".format(
+commons_category_base = pywikibot.Category(site, u'{ns}:{cat}'.format(
 ns=site.namespace(14), cat=country_config.get('commonsCategoryBase')))
 commons_template = country_config.get('commonsTemplate')
 return {
diff --git a/erfgoedbot/monument_tables.py b/erfgoedbot/monument_tables.py
index a6fd148..940bdca 100755
--- a/erfgoedbot/monument_tables.py
+++ b/erfgoedbot/monument_tables.py
@@ -28,11 +28,11 @@
 sql = process_classic_config(country_config)
 except Exception as e:
 raise Exception(
-'{exception} for countrycode: {country}, lang: {lang}'.format(
+u'{exception} for countrycode: {country}, lang: {lang}'.format(
 exception=e, country=country_code, lang=lang))
 
 f = open(os.path.join(
-get_sql_dir(), 'create_table_{}.sql'.format(table)), 'w')
+get_sql_dir(), u'create_table_{}.sql'.format(table)), 'w')
 f.write(sql)
 f.close()
 
@@ -70,7 +70,7 @@
 typ += ' NOT NULL DEFAULT 0'
 elif typ.startswith("varchar("):
 if field.get('default'):
-typ += " NOT NULL DEFAULT '{}'".format(
+typ += u" NOT NULL DEFAULT '{}'".format(
 field.get('default'))
 else:
 typ += " NOT NULL DEFAULT ''"

-- 
To view, visit https://gerrit.wikimedia.org/r/392660
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0242e1ffd3cb158e09c62260a6ab21688168807e
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: [WIP]Activate general query log in dev mode

2017-11-21 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/392652 )

Change subject: [WIP]Activate general query log in dev mode
..

[WIP]Activate general query log in dev mode

Hopefully usefull for debugging differences in what scripts think
they are sending and what the db thinks it is recieving.

WIP because:
* Settings are loaded but no log files seem to be produced =(

Bug: T174503
Change-Id: I727f65b9db76f2d66ae6b320fdab79a0bd7dc6da
---
M docker-compose-base.yml
A mysql/log_query.cnf
2 files changed, 6 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/52/392652/1

diff --git a/docker-compose-base.yml b/docker-compose-base.yml
index ce2aee2..53c12ba 100644
--- a/docker-compose-base.yml
+++ b/docker-compose-base.yml
@@ -20,6 +20,7 @@
   volumes:
 - ./mysql:/etc/mysql/conf.d
 - ./erfgoedbot/sql/:/docker-entrypoint-initdb.d/
+- ./docker_sql_logs:/var/log/mysql
   environment:
 MYSQL_ROOT_PASSWORD: root_password
 MYSQL_DATABASE: s51138__heritage_p
diff --git a/mysql/log_query.cnf b/mysql/log_query.cnf
new file mode 100644
index 000..4f4d7fb
--- /dev/null
+++ b/mysql/log_query.cnf
@@ -0,0 +1,5 @@
+[mysqld]
+general-log=1
+general-log-file=/var/log/mysql/query.log
+log-output=file
+

-- 
To view, visit https://gerrit.wikimedia.org/r/392652
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I727f65b9db76f2d66ae6b320fdab79a0bd7dc6da
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: [WIP] Fix bad idx entries

2017-11-13 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/391139 )

Change subject: [WIP] Fix bad idx entries
..

[WIP] Fix bad idx entries

The idx table is an ugly hack which concatenates four values only to
later explode it. But if the strings are too long for the field
parts are left out and as a result explodingthe value returns
the wrong number of fields.

WIP because:
* Something else is editing these values meaning the last two values
are lost before they reach makeIdx.

Bug: T174503
Change-Id: I42f6b0d97f52e99ac79a3960c21e91c1721b600c
---
M README.md
M api/includes/Statistics.php
2 files changed, 9 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/39/391139/1

diff --git a/README.md b/README.md
index 4a30350..a39f105 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,9 @@
 
 # Update the monuments_all table
 docker-compose run --rm db mysql -h db s51138__heritage_p --user=heritage 
--password=password < erfgoedbot/sql/fill_table_monuments_all.sql
+
+# Update the statistics table
+docker-compose run --rm web php ../maintenance/_buildStats.php
 ```
 
 The web interface will be accessible on http://localhost:8000/
diff --git a/api/includes/Statistics.php b/api/includes/Statistics.php
index 859e5c0..e08f996 100644
--- a/api/includes/Statistics.php
+++ b/api/includes/Statistics.php
@@ -15,6 +15,9 @@
var $axis = [];
static $fieldPrefix = 'st_';
 
+   // must be at least 25 less than max length for idx field
+   static $maxMuniLength = 70;
+
var $lastDay = '';
static $aItems = [
'address', 'address_pct', 'coordinates', 'coordinates_pct', 
'image',
@@ -100,6 +103,9 @@
static function makeIdx( $row ) {
// Need to replace any naturally occuring ':' in row[1]
$muni = str_replace( ':', ':', $row[1] );
+   if ( strlen( $muni ) >= Statistics::$maxMuniLength ) {
+   $muni = substr( $muni, 0, Statistics::$maxMuniLength - 
1 ) . '…';
+   }
return $row[0] . ':' . $muni . ':' . $row[2] . ':' . $row[3];
}
 

-- 
To view, visit https://gerrit.wikimedia.org/r/391139
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I42f6b0d97f52e99ac79a3960c21e91c1721b600c
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] operations...dcat[master]: [WIP]Support prefixed dump types

2017-11-09 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/390312 )

Change subject: [WIP]Support prefixed dump types
..

[WIP]Support prefixed dump types

Adds a general support for prefixed dump formats and an explicit
support for the "-BETA" prefixed .ttl dump.

Bug: T163328
Change-Id: I7dd9a71c75ff9eb4a5efbb47bd855c3979fd0532
---
M DCAT.php
M README.md
M config.example.json
3 files changed, 16 insertions(+), 6 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/dcat 
refs/changes/12/390312/1

diff --git a/DCAT.php b/DCAT.php
index ce7e6e2..354b824 100644
--- a/DCAT.php
+++ b/DCAT.php
@@ -277,7 +277,7 @@
);
}
 
-   $xml->writeElementNS( 'dcterms', 'format', null, 
$mediatype );
+   $xml->writeElementNS( 'dcterms', 'format', null, 
$mediatype['contentType'] );
 
// add description in each language
writeDistributionI18n( $xml, $data, $prefix, $format,
@@ -612,7 +612,11 @@
$testStrings = array();
foreach ( $data['config']['dump-info']['compression'] as $compression ) 
{
foreach ( $data['config']['dump-info']['mediatype'] as $format 
=> $mediatype ) {
-   $testStrings["$format$compression"] = '-all.' . $format 
. '.' . $compression;
+   $prefix = '';
+   if ( array_key_exists( 'prefix', $mediatype ) ) {
+   $prefix = $mediatype['prefix'];
+   }
+   $testStrings["$format$compression"] = '-all' . $prefix 
. '.' . $format . '.' . $compression;
}
}
 
diff --git a/README.md b/README.md
index 76fb017..1faef5c 100644
--- a/README.md
+++ b/README.md
@@ -102,8 +102,9 @@
 *   `accessURL`: URL to the directory where the *.json.gz* files
 reside (`$1` is replaced on the fly by the actual filename),
 e.g. *http://example.org/dumps/$1*
-*   `mediatype`: (`object`) List of media types. e.g.
-`{"json": "application/json"}`
+*   `mediatype`: (`object`) List of media types and prefixes. e.g.
+`"json": {"contentType": "application/json"}` or
+`"ttl": {"contentType": "text/turtle", "prefix": "-BETA"}`
 *   `compression`: (`object`) List of compression formats, in the
 format *name:file-ending* e.g. `{"gzip": "gz"}`
 *   `license`: See ld-info:license above
diff --git a/config.example.json b/config.example.json
index bc1e6f9..9535441 100644
--- a/config.example.json
+++ b/config.example.json
@@ -42,8 +42,13 @@
 "dump-info": {
 "accessURL": "https://dumps.wikimedia.org/wikidatawiki/entities/$1";,
 "mediatype": {
-"json": "application/json",
-"ttl": "text/turtle"
+"json": {
+"contentType": "application/json"
+},
+"ttl":  {
+"contentType": "text/turtle",
+"prefix": "-BETA"
+}
 },
 "compression": {
 "gzip": "gz",

-- 
To view, visit https://gerrit.wikimedia.org/r/390312
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7dd9a71c75ff9eb4a5efbb47bd855c3979fd0532
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/dcat
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Make statistics table for unknown fields

2017-11-08 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/390169 )

Change subject: Make statistics table for unknown fields
..

Make statistics table for unknown fields

Also add header to each report page.

Change-Id: I89e5e2e4adcbfe31139fb111a436c6240d0a6c62
---
M erfgoedbot/update_database.py
M tests/test_update_database.py
2 files changed, 580 insertions(+), 19 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/69/390169/1

diff --git a/erfgoedbot/update_database.py b/erfgoedbot/update_database.py
index fc0fceb..2086046 100755
--- a/erfgoedbot/update_database.py
+++ b/erfgoedbot/update_database.py
@@ -140,30 +140,59 @@
 countryconfig.get('table'), field.get('conv')))
 
 
-def unknownFieldsStatistics(countryconfig, unknownFields):
+def unknownFieldsStatistics(countryconfig, unknown_fields):
 """
 Outputs a list of any unknown fields as a wikitext table.
 
 The table contains the name and frequency of the field and a sample of
 source pages where this field was encountered.
+
+@param countryconfig: the configurations for the dataset being processed.
+@param unknown_fields: dict of discovered fields with each value being a
+Counter for how frequently the field is encountered per page.
+@return: dict summarising the usages
 """
 site = pywikibot.Site(u'commons', u'commons')
 page = pywikibot.Page(
 site, u'Commons:Monuments database/Unknown fields/{0}'.format(
 countryconfig.get('table')))
-summary = u'Updating the list of unknown fields'
+summary = u'Updating the list of unknown fields with {0} entries'
 
-text = u'{| class="wikitable sortable"\n'
-text += u'! Field !! Count !! Sources\n'
-for key, counter in unknownFields.items():
-text += u'|-\n'
-text += u'| {0} || {1} || {2}\n'.format(
-key, sum(counter.values()), format_source_field(counter, site))
+text = (
+u'{{#ifexist:{{FULLPAGENAME}}/header'
+u'|{{/header}}'
+u'|For information on how to use this report and how to localise '
+u'these instructions visit '
+u'[[:c:Commons:Monuments_database/Unknown fields]]. }}\n')
 
-text += u'|}\n'
+total_usages = 0
+pages_with_fields = set()
+
+if not unknown_fields:
+text += u'\nThere are no unknown fields left. Great work!\n'
+else:
+text += u'{| class="wikitable sortable"\n'
+text += u'! Field !! Count !! Sources\n'
+for key, counter in unknown_fields.iteritems():
+total_usages += sum(counter.values())
+pages_with_fields.update(counter.keys())
+text += u'|-\n'
+text += u'| {0} || {1} || {2}\n'.format(
+key, sum(counter.values()), format_source_field(counter, site))
+text += u'|}\n'
+
 text += u'[[Category:Commons:Monuments database/Unknown fields]]'
 
-common.save_to_wiki_or_local(page, summary, text)
+common.save_to_wiki_or_local(
+page, summary.format(len(unknown_fields)), text)
+
+return {
+'report_page': page,
+'config': countryconfig,
+'total_fields': len(unknown_fields),
+'total_pages': len(pages_with_fields),
+'total_usages': total_usages
+}
 
 
 def format_source_field(sources, site, sample_size=4):
@@ -445,7 +474,8 @@
 if countryconfig.get('type') == 'sparql':
 process_country_wikidata(countryconfig, conn, cursor)
 else:
-process_country_list(countryconfig, conn, cursor, fullUpdate, daysBack)
+return process_country_list(
+countryconfig, conn, cursor, fullUpdate, daysBack)
 
 
 def process_country_list(countryconfig, conn, cursor, fullUpdate, daysBack):
@@ -484,7 +514,7 @@
 page, page.permalink(percent_encoded=False), countryconfig,
 conn, cursor, unknownFields=unknownFields)
 
-unknownFieldsStatistics(countryconfig, unknownFields)
+return unknownFieldsStatistics(countryconfig, unknownFields)
 
 
 def load_wikidata_template_sparql():
@@ -522,6 +552,102 @@
 
 for resultitem in query_result:
 process_monument_wikidata(resultitem, countryconfig, conn, cursor)
+
+
+def make_statistics(statistics):
+"""Output the overall results for unknown fields as a nice wikitable."""
+site = pywikibot.Site('commons', 'commons')
+page = pywikibot.Page(
+site, u'Commons:Monuments database/Unknown fields/Statistics')
+
+text = (
+u'{| class="wikitable sortable"\n'
+u'! country '
+u'!! lang '
+u'!! data-sort-type="number"|Total unknown fields '
+u'!! data-sort-type="number"|Total usage of unknown fields '
+u'!! data-sort-type="number"|Total pages containing unknown fields '
+u'!! Report page '
+u'!! Row template '
+u'!! Header template 

[MediaWiki-commits] [Gerrit] labs...heritage[master]: Stop categorization job during database update

2017-11-08 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/390142 )

Change subject: Stop categorization job during database update
..

Stop categorization job during database update

This commits changes which exist on labs today.

Change-Id: I26784cd24b5ccd52202ca45f02f15ce4bbbfe990
---
M bin/categorize_images.sh
M bin/update_monuments.sh
2 files changed, 9 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/42/390142/1

diff --git a/bin/categorize_images.sh b/bin/categorize_images.sh
index 342b437..884f3d1 100755
--- a/bin/categorize_images.sh
+++ b/bin/categorize_images.sh
@@ -5,6 +5,9 @@
 CURRENT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 . $CURRENT_DIR/defaults.sh
 
+# Use a virtual environment with our requirements
+source $VIRTUAL_ENV_PATH/bin/activate
+
 # Make sure we are in our homedir
 cd $HOME_DIR || exit
 
diff --git a/bin/update_monuments.sh b/bin/update_monuments.sh
index 2d96a9c..3a4af50 100755
--- a/bin/update_monuments.sh
+++ b/bin/update_monuments.sh
@@ -28,10 +28,16 @@
 echo_time "Full source database update..."
 $PYWIKIBOT_BIN $ERFGOED_PATH/update_database.py -fullupdate -log -skip_wd
 
+# stop categorization job as next stage locks the database
+jstop categorize_images
+
 # Update the all monuments table
 echo_time "Update monuments_all table..."
 $MYSQL_BIN -h $DB_SERVER $DATABASE < 
$ERFGOED_PATH/sql/fill_table_monuments_all.sql
 
+# restart the categorization job
+jsub -l release=trusty -mem 1000m -once -j y -o 
/data/project/heritage/logs/categorize_images.log -N categorize_images 
/data/project/heritage/bin/categorize_images.sh >> 
/data/project/heritage/logs/crontab.log
+
 ## Update the image table. Is now another job
 # echo_time "Update image table..."
 # PYWIKIBOT_BIN $ERFGOED_PATH/populate_image_table.py

-- 
To view, visit https://gerrit.wikimedia.org/r/390142
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I26784cd24b5ccd52202ca45f02f15ce4bbbfe990
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Allow non-ascii template names

2017-11-08 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/390064 )

Change subject: Allow non-ascii template names
..

Allow non-ascii template names

Bug: T180068
Change-Id: I0d20e0071642df8f1dbd4c1edc73800aae945525
---
M erfgoedbot/unused_monument_images.py
1 file changed, 2 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/64/390064/1

diff --git a/erfgoedbot/unused_monument_images.py 
b/erfgoedbot/unused_monument_images.py
index 78ab903..6dbfa20 100644
--- a/erfgoedbot/unused_monument_images.py
+++ b/erfgoedbot/unused_monument_images.py
@@ -306,12 +306,12 @@
 countryconfig.get('project', u'wikipedia'))
 row_template_page = pywikibot.Page(
 row_site,
-'Template:{0}'.format(countryconfig.get('rowTemplate')))
+u'Template:{0}'.format(countryconfig.get('rowTemplate')))
 row_template = row_template_page.title(
 asLink=True, withNamespace=False, insite=site)
 
 if countryconfig.get('commonsTemplate'):
-commons_template = '{{tl|%s}}' % (
+commons_template = u'{{tl|%s}}' % (
 countryconfig.get('commonsTemplate'), )
 
 if row.get('report_page'):

-- 
To view, visit https://gerrit.wikimedia.org/r/390064
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0d20e0071642df8f1dbd4c1edc73800aae945525
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Add WLM favicon

2017-10-19 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/385151 )

Change subject: Add WLM favicon
..

Add WLM favicon

The favicon was already referred to in some files but was not
part of the repo.

Adding this to the root html folder makes most browsers use it
automatically.

Change-Id: Ic73cc30cebfeccfc1170514fa7ae3b76c28cd937
---
A html/favicon.ico
M html/index.html
2 files changed, 3 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/51/385151/1

diff --git a/html/favicon.ico b/html/favicon.ico
new file mode 100644
index 000..9564a85
--- /dev/null
+++ b/html/favicon.ico
Binary files differ
diff --git a/html/index.html b/html/index.html
index 6be98c7..14503d7 100644
--- a/html/index.html
+++ b/html/index.html
@@ -6,6 +6,8 @@
 
 Heritage - tools for Wiki Loves Monuments
 
+
+
 

[MediaWiki-commits] [Gerrit] labs...heritage[master]: Drop non-url registrant_url

2017-09-29 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/381441 )

Change subject: Drop non-url registrant_url
..

Drop non-url registrant_url

Change-Id: I240e32395e7e3f0f172954e0c2f04e4cd825b5a1
---
M erfgoedbot/monuments_config/sv_es.json
1 file changed, 1 insertion(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/41/381441/1

diff --git a/erfgoedbot/monuments_config/sv_es.json 
b/erfgoedbot/monuments_config/sv_es.json
index c935fca..4c38a27 100644
--- a/erfgoedbot/monuments_config/sv_es.json
+++ b/erfgoedbot/monuments_config/sv_es.json
@@ -79,10 +79,6 @@
 {
 "dest": "commonscat",
 "source": "monumento_categoría"
-},
-{
-"dest": "registrant_url",
-"source": "id"
 }
 ],
 "sql_lang": "Spanish",
@@ -151,10 +147,6 @@
 "monument_article": {
 "value": "monumento_enlace",
 "type": "Field"
-},
-"registrant_url": {
-"value": "registrant_url",
-"type": "Field"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/381441
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I240e32395e7e3f0f172954e0c2f04e4cd825b5a1
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Correct monument_article matching in th_th

2017-09-29 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/381405 )

Change subject: Correct monument_article matching in th_th
..

Correct monument_article matching in th_th

Also remove non-url registrar_url

Bug: T176712
Change-Id: If689239233b448a4af23f2e384b07111e440326b
---
M erfgoedbot/monuments_config/th_th.json
1 file changed, 2 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/05/381405/1

diff --git a/erfgoedbot/monuments_config/th_th.json 
b/erfgoedbot/monuments_config/th_th.json
index 9fb0eb3..470b3ab 100644
--- a/erfgoedbot/monuments_config/th_th.json
+++ b/erfgoedbot/monuments_config/th_th.json
@@ -82,12 +82,8 @@
 },
 {
 "dest": "monument_article",
-"source": "name",
+"source": "ชื่อ",
 "conv": "extractWikilink"
-},
-{
-"dest": "registrant_url",
-"source": "register"
 }
 ],
 "sql_lang": "Thai",
@@ -160,10 +156,6 @@
 "monument_article": {
 "value": "monument_article",
 "type": "Field"
-},
-"registrant_url": {
-"value": "registrant_url",
-"type": "Field"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/381405
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If689239233b448a4af23f2e384b07111e440326b
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Add unmapped field for al_sq

2017-09-28 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/381364 )

Change subject: Add unmapped field for al_sq
..

Add unmapped field for al_sq

Also drop invalid registrar_url field

Change-Id: I5156279973e79d3011e53f3b3980a40a9de0cebf
Task: T176118
---
M erfgoedbot/monuments_config/al_sq.json
1 file changed, 17 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/64/381364/1

diff --git a/erfgoedbot/monuments_config/al_sq.json 
b/erfgoedbot/monuments_config/al_sq.json
index 2c51aa0..a9367da 100644
--- a/erfgoedbot/monuments_config/al_sq.json
+++ b/erfgoedbot/monuments_config/al_sq.json
@@ -26,6 +26,22 @@
 "source": "Nr"
 },
 {
+"dest": "category",
+"source": "Kategoria"
+},
+{
+"dest": "field",
+"source": "Fusha"
+},
+{
+"dest": "type",
+"source": "Tipologjia"
+},
+{
+"dest": "county",
+"source": "Qarku"
+},
+{
 "dest": "place",
 "source": "Lokacioni"
 },
@@ -51,10 +67,6 @@
 "dest": "monument_article",
 "source": "Emërtimi",
 "conv": "extractWikilink"
-},
-{
-"dest": "registrant_url",
-"source": ""
 }
 ],
 "sql_lang": "Albanian",
@@ -113,4 +125,4 @@
 "type": "Field"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/381364
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5156279973e79d3011e53f3b3980a40a9de0cebf
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Fix incorrect mapping in am_hy

2017-09-28 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/381361 )

Change subject: Fix incorrect mapping in am_hy
..

Fix incorrect mapping in am_hy

Bug: T176991
Change-Id: If93d8387b3da6b71d33521cb7e737ead498ba15b
---
M erfgoedbot/monuments_config/am_hy.json
1 file changed, 1 insertion(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/61/381361/1

diff --git a/erfgoedbot/monuments_config/am_hy.json 
b/erfgoedbot/monuments_config/am_hy.json
index 7019742..4b0aa17 100644
--- a/erfgoedbot/monuments_config/am_hy.json
+++ b/erfgoedbot/monuments_config/am_hy.json
@@ -83,7 +83,7 @@
 },
 {
 "dest": "registrant_url",
-"source": "gov_doc_id",
+"source": "հղում",
 "conv": "generateRegistrantUrl"
 }
 ],

-- 
To view, visit https://gerrit.wikimedia.org/r/381361
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If93d8387b3da6b71d33521cb7e737ead498ba15b
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Add unmapped field for am_hy

2017-09-28 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/381215 )

Change subject: Add unmapped field for am_hy
..

Add unmapped field for am_hy

Change-Id: Icb945f9492c64175b62c9e3c3710d160aae1992e
---
M erfgoedbot/monuments_config/am_hy.json
1 file changed, 5 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/15/381215/1

diff --git a/erfgoedbot/monuments_config/am_hy.json 
b/erfgoedbot/monuments_config/am_hy.json
index c029d63..7019742 100644
--- a/erfgoedbot/monuments_config/am_hy.json
+++ b/erfgoedbot/monuments_config/am_hy.json
@@ -62,6 +62,10 @@
 "check": "checkLon"
 },
 {
+"dest": "height",
+"source": "բարձրություն"
+},
+{
 "dest": "gov_doc_id",
 "source": "հղում"
 },
@@ -155,4 +159,4 @@
 "type": "Field"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/381215
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Icb945f9492c64175b62c9e3c3710d160aae1992e
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Harvest coordinate template

2017-09-27 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/380939 )

Change subject: Harvest coordinate template
..

Harvest coordinate template

This will harvest the coord parameter containing a {{coord|lat|lon}}
template. Not that it will not (yet) parse this in order to populate
the lat, lon values

Bug: T176845
Change-Id: I19802be1ce59f760c3ab267e6b0d9d39ce5b01fb
---
M erfgoedbot/monuments_config/ph_en.json
1 file changed, 5 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/39/380939/1

diff --git a/erfgoedbot/monuments_config/ph_en.json 
b/erfgoedbot/monuments_config/ph_en.json
index e7a555e..a7c7cf7 100644
--- a/erfgoedbot/monuments_config/ph_en.json
+++ b/erfgoedbot/monuments_config/ph_en.json
@@ -64,6 +64,10 @@
 "check": "checkLon"
 },
 {
+"dest": "coord_template",
+"source": "coord"
+},
+{
 "dest": "image",
 "source": "image"
 },
@@ -148,4 +152,4 @@
 "type": "Raw"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/380939
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I19802be1ce59f760c3ab267e6b0d9d39ce5b01fb
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Remove non-url registrant_url

2017-09-26 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/380769 )

Change subject: Remove non-url registrant_url
..

Remove non-url registrant_url

Change-Id: Id70c944f0ca4bc2bd2a26cf0e0e0a728cbc312e5
---
M erfgoedbot/monuments_config/tn_fr.json
1 file changed, 1 insertion(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/69/380769/1

diff --git a/erfgoedbot/monuments_config/tn_fr.json 
b/erfgoedbot/monuments_config/tn_fr.json
index fc77bee..31bedf1 100644
--- a/erfgoedbot/monuments_config/tn_fr.json
+++ b/erfgoedbot/monuments_config/tn_fr.json
@@ -75,10 +75,6 @@
 "dest": "monument_article",
 "source": "monument",
 "conv": "extractWikilink"
-},
-{
-"dest": "registrant_url",
-"source": "id"
 }
 ],
 "sql_lang": "French",
@@ -147,10 +143,6 @@
 "monument_article": {
 "value": "monument_article",
 "type": "Field"
-},
-"registrant_url": {
-"value": "registrant_url",
-"type": "Field"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/380769
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id70c944f0ca4bc2bd2a26cf0e0e0a728cbc312e5
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Fix misspelt parameter in lu_lb

2017-09-26 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/380708 )

Change subject: Fix misspelt parameter in lu_lb
..

Fix misspelt parameter in lu_lb

Bug: T174556
Change-Id: Ieb47b9627908b588bc910f15921589d72b684ece
---
M erfgoedbot/monuments_config/lu_lb.json
1 file changed, 2 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/08/380708/1

diff --git a/erfgoedbot/monuments_config/lu_lb.json 
b/erfgoedbot/monuments_config/lu_lb.json
index 7d13004..184ebad 100644
--- a/erfgoedbot/monuments_config/lu_lb.json
+++ b/erfgoedbot/monuments_config/lu_lb.json
@@ -44,7 +44,7 @@
 },
 {
 "dest": "klasseiert_zenter",
-"source": "klasséiert_zënter"
+"source": "klasséiert_zanter"
 },
 {
 "dest": "lat",
@@ -138,4 +138,4 @@
 "type": "Field"
 }
 }
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/380708
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ieb47b9627908b588bc910f15921589d72b684ece
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: [WIP]Restructure missing_commonscat_links Statistics

2017-09-23 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/380060 )

Change subject: [WIP]Restructure missing_commonscat_links Statistics
..

[WIP]Restructure missing_commonscat_links Statistics

This undoes https://gerrit.wikimedia.org/r/#/c/379971 for
missing_commonscat_links as we no longer loop over all configs.

WIP:
* Need to make sure I didn't break anything
* Consider integrating https://gerrit.wikimedia.org/r/#/c/379974/

Change-Id: I0f8d30fabeadfc0dc1e6b18bcc7d36bd7708fb09
---
M erfgoedbot/missing_commonscat_links.py
1 file changed, 99 insertions(+), 34 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/60/380060/1

diff --git a/erfgoedbot/missing_commonscat_links.py 
b/erfgoedbot/missing_commonscat_links.py
index 3f9594a..7466fec 100644
--- a/erfgoedbot/missing_commonscat_links.py
+++ b/erfgoedbot/missing_commonscat_links.py
@@ -33,17 +33,32 @@
 '''
 if not countryconfig.get('missingCommonscatPage'):
 # missingCommonscatPage not set, just skip silently.
-return False
+return {
+'code': countrycode,
+'lang': lang,
+'config': countryconfig,
+'cmt': 'skipped: no missingCommonscatPage'
+}
 
 if countryconfig.get('type') == 'sparql':
 # This script does not (yet) work for SPARQL sources, skip silently
-return False
+return {
+'code': countrycode,
+'lang': lang,
+'config': countryconfig,
+'cmt': 'skipped: cannot handle sparql'
+}
 
 commonscatField = lookupSourceField(u'commonscat', countryconfig)
 if not commonscatField:
 # Field is missing. Something is seriously wrong, but we just skip it
 # silently
-return False
+return {
+'code': countrycode,
+'lang': lang,
+'config': countryconfig,
+'cmt': 'skipped: no template field matched to commonscat!!'
+}
 
 missingCommonscatPage = countryconfig.get('missingCommonscatPage')
 commonsTrackerCategory = countryconfig.get(
@@ -111,7 +126,13 @@
 pywikibot.debug(text, _logger)
 common.save_to_wiki_or_local(page, comment, text)
 
-return totalCategories
+return {
+'code': countrycode,
+'lang': lang,
+'report_page': page,
+'config': countryconfig,
+'total_cats': totalCategories
+}
 
 
 def lookupSourceField(destination, countryconfig):
@@ -172,36 +193,78 @@
 return result
 
 
-def makeStatistics(mconfig, totals):
-text = u'{| class="wikitable sortable"\n'
-text += \
-u'! country !! lang !! total !! page !! row template !! Commons 
template\n'
-
-totalCategories = 0
-for ((countrycode, lang), countryconfig) in 
sorted(mconfig.countries.items()):
-if countryconfig.get('skip'):
-continue
-if countryconfig.get('missingCommonscatPage') and 
countryconfig.get('commonsTemplate'):
-text += u'|-\n'
-text += u'| %s ' % countrycode
-text += u'|| %s ' % lang
-text += u'|| %s ' % totals.get((countrycode, lang))
-totalCategories += totals.get((countrycode, lang))
-text += u'|| [[:%s:%s|%s]] ' % (lang, countryconfig.get(
-'missingCommonscatPage'), 
countryconfig.get('missingCommonscatPage'))
-text += u'|| [[:%s:Template:%s|%s]] ' % (
-lang, countryconfig.get('rowTemplate'), 
countryconfig.get('rowTemplate'))
-text += \
-u'|| {{tl|%s}}\n' % countryconfig.get('commonsTemplate')
-text += u'|- class="sortbottom"\n'
-text += u'| || || %s \n' % totalCategories
-text += u'|}\n'
-
+def makeStatistics(statistics):
+"""Output the overall results of the bot as a nice wikitable."""
 site = pywikibot.Site('commons', 'commons')
 page = pywikibot.Page(
 site, u'Commons:Monuments database/Missing commonscat 
links/Statistics')
 
-comment = u'Updating missing commonscat links statistics. Total missing 
links: %s' % totalCategories
+text = (
+u'{| class="wikitable sortable"\n'
+u'! country '
+u'!! lang '
+u'!! total '
+u'!! page '
+u'!! row template '
+u'!! Commons template'
+u'\n')
+
+text_row = (
+u'|-\n'
+u'|| {code} \n'
+u'|| {lang} \n'
+u'|| {total_cats} \n'
+u'|| {report_page} \n'
+u'|| {row_template} \n'
+u'|| {commons_template} \n')
+
+total_cats_sum = 0
+for row in statistics:
+countryconfig = row.get('config')
+total_cats_or_cmt = row.get('total_cats')
+row_template = u'---'
+commons_template = u'---'
+report_page = u'---'
+
+if row.get('total_cats') is not None:
+total_cats_sum += row.get('total_cat

[MediaWiki-commits] [Gerrit] labs...heritage[master]: Add default instructions to top of unused images reports

2017-09-23 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/380058 )

Change subject: Add default instructions to top of unused images reports
..

Add default instructions to top of unused images reports

Bug:T176200
Change-Id: I048bbb13761ea41996d252efff68b68036698dfd
---
M erfgoedbot/unused_monument_images.py
M tests/test_unused_monument_images.py
2 files changed, 15 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/58/380058/1

diff --git a/erfgoedbot/unused_monument_images.py 
b/erfgoedbot/unused_monument_images.py
index f362583..3b2aa14 100644
--- a/erfgoedbot/unused_monument_images.py
+++ b/erfgoedbot/unused_monument_images.py
@@ -133,7 +133,12 @@
 order to ensure all candidates for a given monument id are presented.
 """
 # People can add a /header template for with more info
-text = u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
+text = (
+u'{{#ifexist:{{FULLPAGENAME}}/header'
+u'|{{/header}}'
+u'|For information on how to use this report and how to localised '
+u'these instructions visit '
+u'[[:c:Commons:Monuments database/Unused images]]. }}\n')
 total_pages = 0
 total_ids = 0
 totalImages = 0
diff --git a/tests/test_unused_monument_images.py 
b/tests/test_unused_monument_images.py
index 6e47c7a..60218ed 100644
--- a/tests/test_unused_monument_images.py
+++ b/tests/test_unused_monument_images.py
@@ -479,6 +479,12 @@
 self.mock_report_page = mock.create_autospec(
 unused_monument_images.pywikibot.Page,
 )
+self.prefix = (
+u'{{#ifexist:{{FULLPAGENAME}}/header'
+u'|{{/header}}'
+u'|For information on how to use this report and how to localised '
+u'these instructions visit '
+u'[[:c:Commons:Monuments database/Unused images]]. }}\n')
 
 self.unused_images = OrderedDict()
 self.unused_images['source_link_1'] = OrderedDict()
@@ -497,8 +503,7 @@
 
 def test_output_country_report_complete(self):
 expected_cmt = u'Images to be used in monument lists: 5'
-expected_output = (
-u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
+expected_output = self.prefix + (
 u'=== source_link_1 ===\n'
 u'\n'
 u'File:filename1_11.jpg|id_11\n'
@@ -532,8 +537,7 @@
 expected_cmt = (
 u'Images to be used in monument lists: 2 (gallery maximum '
 u'reached), total of unused images: 5')
-expected_output = (
-u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
+expected_output = self.prefix + (
 u'=== source_link_1 ===\n'
 u'\n'
 u'File:filename1_11.jpg|id_11\n'
@@ -563,8 +567,7 @@
 expected_cmt = (
 u'Images to be used in monument lists: 3 (gallery maximum '
 u'reached), total of unused images: 5')
-expected_output = (
-u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
+expected_output = self.prefix + (
 u'=== source_link_1 ===\n'
 u'\n'
 u'File:filename1_11.jpg|id_11\n'

-- 
To view, visit https://gerrit.wikimedia.org/r/380058
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I048bbb13761ea41996d252efff68b68036698dfd
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: [WIP]Group unused images per source page

2017-09-19 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/379141 )

Change subject: [WIP]Group unused images per source page
..

[WIP]Group unused images per source page

Also ensures that all candidates for a given monuments id are always
displayed even if this brings us over the max threshold.

WIP because:
* needs testing
* look over if more info can be sent to stats
* possibly integrate with T176200 update

Bug: T117327
Change-Id: Ifd1915eb7ce810d1ecfa0d6ce98007726593d1eb
---
M erfgoedbot/unused_monument_images.py
1 file changed, 56 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/41/379141/1

diff --git a/erfgoedbot/unused_monument_images.py 
b/erfgoedbot/unused_monument_images.py
index 07676d5..5f60e80 100644
--- a/erfgoedbot/unused_monument_images.py
+++ b/erfgoedbot/unused_monument_images.py
@@ -24,30 +24,9 @@
 _logger = "unused_images"
 
 
-def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2, 
cursor2, local_write):
-'''
-Work on a single country.
-'''
-if not countryconfig.get('unusedImagesPage'):
-# unusedImagesPage not set, just skip silently.
-return False
-
-unusedImagesPage = countryconfig.get('unusedImagesPage')
-project = countryconfig.get('project', u'wikipedia')
-commonsTrackerCategory = countryconfig.get(
-'commonsTrackerCategory').replace(u' ', u'_')
-
-withoutPhoto = getMonumentsWithoutPhoto(countrycode, lang, conn, cursor)
-photos = getMonumentPhotos(commonsTrackerCategory, conn2, cursor2)
-
-pywikibot.log(u'withoutPhoto %s elements' % (len(withoutPhoto),))
-pywikibot.log(u'photos %s elements' % (len(photos),))
-
-# People can add a /header template for with more info
-text = u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
-text += u'\n'
-totalImages = 0
-maxImages = 1000
+def group_unused_images_by_source(photos, withoutPhoto, countryconfig):
+"""Identify all unused images and group them by source page and id."""
+unused_images = {}
 
 for catSortKey in sorted(photos.keys()):
 try:
@@ -74,24 +53,68 @@
 try:
 source_link = common.get_source_link(
 withoutPhoto.get(monumentId),
-countryconfig.get('type'),
-monumentId)
+countryconfig.get('type'))
+if source_link not in unused_images:
+unused_images[source_link] = {}
 except ValueError:
 pywikibot.warning(
 u'Could not find wikiSourceList for %s (%s)' % (
 monumentId, withoutPhoto.get(monumentId)))
 continue
 imageName = photos.get(catSortKey)
-# pywikibot.output(u'Key %s returned a result' % (monumentId,))
-# pywikibot.output(imageName)
-if totalImages <= maxImages:
-text += u'File:{0}|{1}\n'.format(
-unicode(imageName, 'utf-8'), source_link)
-totalImages += 1
+
+if monumentId not in unused_images[source_link]:
+unused_images[source_link][monumentId] = []
+
+unused_images[source_link][monumentId].append(imageName)
 except ValueError:
 pywikibot.warning(u'Got value error for %s' % (monumentId,))
 
-text += u''
+return unused_images
+
+
+#TODO send more stats back to main
+def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2, 
cursor2, local_write):
+'''
+Work on a single country.
+'''
+if not countryconfig.get('unusedImagesPage'):
+# unusedImagesPage not set, just skip silently.
+return False
+
+unusedImagesPage = countryconfig.get('unusedImagesPage')
+project = countryconfig.get('project', u'wikipedia')
+commonsTrackerCategory = countryconfig.get(
+'commonsTrackerCategory').replace(u' ', u'_')
+
+withoutPhoto = getMonumentsWithoutPhoto(countrycode, lang, conn, cursor)
+photos = getMonumentPhotos(commonsTrackerCategory, conn2, cursor2)
+
+pywikibot.log(u'withoutPhoto %s elements' % (len(withoutPhoto),))
+pywikibot.log(u'photos %s elements' % (len(photos),))
+
+# People can add a /header template for with more info
+text = u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
+totalImages = 0
+maxImages = 1000
+
+unused_images = group_unused_images_by_source(
+photos, withoutPhoto, countryconfig)
+
+for source_page, value in unused_images.iteritems():
+if totalImages <= maxImages:
+text += u'=== {0} ===\n'.format(source_page)
+text += u'\n'
+for monument_id, candidates in value.iteritems():
+  

[MediaWiki-commits] [Gerrit] labs...heritage[master]: Make all erfgoedbot scripts respect the skipping mechanisms.

2017-09-19 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/379140 )

Change subject: Make all erfgoedbot scripts respect the skipping mechanisms.
..

Make all erfgoedbot scripts respect the skipping mechanisms.

The skipping mechanisms (introduced through the merging of the
Wikidata branch) consist of the `skip` field in the config and
the `-skip_wd` command line argument.

Also: set nl-wd_(nl) to skip

Change-Id: I68f345cf09e555e2ea37c4db0197ccc11954abc1
---
M erfgoedbot/add_coord_to_articles.py
M erfgoedbot/add_object_location_monuments.py
M erfgoedbot/categorize_images.py
M erfgoedbot/images_of_monuments_without_id.py
M erfgoedbot/missing_commonscat_links.py
M erfgoedbot/monuments_config/nl-wd_nl.json
M erfgoedbot/populate_image_table.py
M erfgoedbot/unused_monument_images.py
M erfgoedbot/update_id_dump.py
9 files changed, 65 insertions(+), 12 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/40/379140/1

diff --git a/erfgoedbot/add_coord_to_articles.py 
b/erfgoedbot/add_coord_to_articles.py
index 5a9950e..58fa56e 100644
--- a/erfgoedbot/add_coord_to_articles.py
+++ b/erfgoedbot/add_coord_to_articles.py
@@ -291,6 +291,7 @@
 def main():
 countrycode = u''
 lang = u''
+skip_wd = False
 connMon = None
 cursorMon = None
 
@@ -302,10 +303,12 @@
 countrycode = value
 elif option == '-langcode':
 lang = value
+elif option == '-skip_wd':
+skip_wd = True
 else:
 raise Exception(
-u'Bad parameters. Expected "-countrycode", "-langcode" or '
-u'pywikibot args. Found "{}"'.format(option))
+u'Bad parameters. Expected "-countrycode", "-langcode", '
+u'"-skip_wd" or pywikibot args. Found "{}"'.format(option))
 
 if countrycode and lang:
 if not mconfig.countries.get((countrycode, lang)):
@@ -318,6 +321,9 @@
 u'be used together.')
 else:
 for (countrycode, lang), countryconfig in 
mconfig.countries.iteritems():
+if (countryconfig.get('skip') or
+(skip_wd and (countryconfig.get('type') == 'sparql'))):
+continue
 pywikibot.output(u'Working on countrycode "%s" in language "%s"' % 
(countrycode, lang))
 processCountry(countrycode, lang, countryconfig, 
wikiData.get(lang), connMon, cursorMon)
 
diff --git a/erfgoedbot/add_object_location_monuments.py 
b/erfgoedbot/add_object_location_monuments.py
index 25816f4..ae780d9 100644
--- a/erfgoedbot/add_object_location_monuments.py
+++ b/erfgoedbot/add_object_location_monuments.py
@@ -206,6 +206,7 @@
 def main():
 countrycode = u''
 lang = u''
+skip_wd = False
 local_write = None
 
 # Connect database, we need that
@@ -218,12 +219,15 @@
 countrycode = value
 elif option == '-langcode':
 lang = value
+elif option == '-skip_wd':
+skip_wd = True
 elif option == '-local_write':
 local_write = value
 else:
 raise Exception(
 u'Bad parameters. Expected "-countrycode", "-langcode", '
-u'"-local_write" or pywikibot args. Found "{}"'.format(option))
+u'"-skip_wd", "-local_write" or pywikibot args. '
+u'Found "{}"'.format(option))
 
 pywikibot.setSite(pywikibot.getSite(u'commons', u'commons'))
 
@@ -241,6 +245,9 @@
 u'be used together.')
 else:
 for (countrycode, lang), countryconfig in 
mconfig.countries.iteritems():
+if (countryconfig.get('skip') or
+(skip_wd and (countryconfig.get('type') == 'sparql'))):
+continue
 if not countryconfig.get('autoGeocode'):
 pywikibot.output(
 u'"%s" in language "%s" is not supported in auto geocode 
mode (yet).' % (countrycode, lang))
diff --git a/erfgoedbot/categorize_images.py b/erfgoedbot/categorize_images.py
index 06c7b71..746dc59 100644
--- a/erfgoedbot/categorize_images.py
+++ b/erfgoedbot/categorize_images.py
@@ -531,6 +531,7 @@
 countrycode = u''
 lang = u''
 overridecat = u''
+skip_wd = False
 local_write = None
 conn = None
 cursor = None
@@ -545,13 +546,15 @@
 lang = value
 elif option == '-overridecat':
 overridecat = value
+elif option == '-skip_wd':
+skip_wd = True
 elif option == '-local_write':
 local_write = value
 else:
 raise Exception(
 u'Bad parameters. Expected "-countrycode", "-langcode", '
-u'"-overridecat", "-local_write" or pywikibot args. '
-u'Found "{}"'.format(option))
+u'"-overridecat", "-skip_wd", "-local_write" or pywikibot '
+  

[MediaWiki-commits] [Gerrit] labs...heritage[master]: Harvest the source page of unknown fields

2017-09-19 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/379126 )

Change subject: Harvest the source page of unknown fields
..

Harvest the source page of unknown fields

The UnknownFields report is as of now not actionable as it is very
hard to track down the location of the error. This stores all pages
that have a given field, and displays a subset of these as
wikilinks on the report in a third column.

Bug: T117330
Change-Id: Iad3517c0bce961317e493ffad8b34ee5c6f8e363
---
M erfgoedbot/update_database.py
M requirements-test.txt
M tests/test_update_database.py
3 files changed, 104 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/26/379126/1

diff --git a/erfgoedbot/update_database.py b/erfgoedbot/update_database.py
index 11a8f83..08fc4d5 100755
--- a/erfgoedbot/update_database.py
+++ b/erfgoedbot/update_database.py
@@ -141,9 +141,10 @@
 
 def unknownFieldsStatistics(countryconfig, unknownFields, local_write):
 """
-Produce some unknown field statistics to debug.
+Outputs a list of any unknown fields as a wikitext table.
 
-This is still very raw data. Should be formatted and more information.
+The table contains the name and frequency of the field and a sample of
+source pages where this field was encountered.
 """
 site = pywikibot.Site(u'commons', u'commons')
 page = pywikibot.Page(
@@ -152,15 +153,42 @@
 summary = u'Updating the list of unknown fields'
 
 text = u'{| class="wikitable sortable"\n'
-text += u'! Field !! Count\n'
+text += u'! Field !! Count !! Sources\n'
 for key, value in unknownFields.items():
 text += u'|-\n'
-text += u'| %s || %s\n' % (key, value)
+text += u'| {0} || {1} || {2}\n'.format(
+key, value['count'], format_source_field(value['sources'], site))
 
 text += u'|}\n'
 text += u'[[Category:Commons:Monuments database/Unknown fields]]'
 
 common.save_to_wiki_or_local(page, summary, text, local_path=local_write)
+
+
+def format_source_field(sources, site, sample_size=4):
+"""
+Format a list of source pages to fit in the statistics field.
+
+@param sources: set of pywikibot.Page objects
+@param site: the site to which the output should be written (commons)
+@param sample_size: the number of source pages to output
+"""
+source_text = ''
+if len(sources) == 1:
+source_page = list(sources)[0]
+source_text = source_page.title(
+asLink=True, forceInterwiki=True, insite=site)
+else:
+source_slice = list(sources)[:sample_size]
+remaining = len(sources) - len(source_slice)
+for source_page in source_slice:
+source_text += u'\n* {0}'.format(
+source_page.title(
+asLink=True, forceInterwiki=True, insite=site))
+if remaining:
+source_text += u'\n* and {0} more page(s)'.format(remaining)
+
+return source_text
 
 
 def updateMonument(contents, source, countryconfig, conn, cursor, sourcePage):
@@ -338,8 +366,12 @@
 title, field, value),
 _logger)
 if field not in unknownFields:
-unknownFields[field] = 0
-unknownFields[field] += 1
+unknownFields[field] = {
+'count': 0,
+'sources': set()
+}
+unknownFields[field]['count'] += 1
+unknownFields[field]['sources'].add(sourcePage)
 # time.sleep(5)
 
 # If we truncate we don't have to check for primkey (it's a made up one)
diff --git a/requirements-test.txt b/requirements-test.txt
index 9f90b1c..ddd621b 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -5,3 +5,4 @@
 nose-cov
 coverage
 yamllint
+orderedset
diff --git a/tests/test_update_database.py b/tests/test_update_database.py
index 1145984..7435094 100644
--- a/tests/test_update_database.py
+++ b/tests/test_update_database.py
@@ -3,6 +3,7 @@
 import mock
 import unittest
 import pywikibot
+from orderedset import OrderedSet
 
 from erfgoedbot import update_database
 
@@ -49,13 +50,21 @@
 self.assertEqual(unknown_fields, {})
 
 def 
test_processMonument_with_one_unknown_param_correctly_returns_unknown_fields(self):
-params = [u'id=1234', u'name=A Monument Name', u'some_unknown_field=An 
unknown field value']
+params = [
+u'id=1234',
+u'name=A Monument Name',
+u'some_unknown_field=An unknown field value'
+]
 unknown_fields = {}
+expected_unknown = {'count': 1, 'sources': set([self.mock_page])}
 with self.assertRaises(update_database.NoPrimkeyException):
 update_database.processMonument(
 params, self.source, self.country_co

[MediaWiki-commits] [Gerrit] labs...heritage[master]: Add mechanism for storing wikipage locally instead of writin...

2017-09-18 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/378800 )

Change subject: Add mechanism for storing wikipage locally instead of writing 
to wiki
..

Add mechanism for storing wikipage locally instead of writing to wiki

This allows e.g. local (docker) testing of page saving outputs and unit testing
of report writing functions.

Right now the trigger for this is the command line argument
"-local_write:" but a better solution would be
an environment variable detected by common.save_to_wiki_or_local().

Support for this command line argument is added to all erfgoedbot scripts
apart from add_coord_to_articles.py (which relies on user interaction).

Also:
* Minor cleanup of add_object_location to make linter cry less

Bug: T174614
Change-Id: I2bf650f99a57a0e93dbb0c3d6f520049c3579957
---
M README.md
M erfgoedbot/add_object_location_monuments.py
M erfgoedbot/categorize_images.py
M erfgoedbot/common.py
M erfgoedbot/database_statistics.py
M erfgoedbot/images_of_monuments_without_id.py
M erfgoedbot/missing_commonscat_links.py
M erfgoedbot/populate_image_table.py
M erfgoedbot/unused_monument_images.py
M erfgoedbot/update_database.py
M requirements.txt
M tests/test_common.py
12 files changed, 308 insertions(+), 98 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/00/378800/1

diff --git a/README.md b/README.md
index 50b5352..98f3ba0 100644
--- a/README.md
+++ b/README.md
@@ -22,9 +22,10 @@
 
 # Build and start the Docker containers
 docker-compose -f docker-compose-bot.yml up --build -d
+mkdir -p docker_pages
 
 # Run the bot to harvest a country
-docker-compose -f docker-compose-bot.yml run --rm bot python 
erfgoedbot/update_database.py -countrycode:ge -langcode:ka -log
+docker-compose -f docker-compose-bot.yml run --rm bot python 
erfgoedbot/update_database.py -countrycode:ge -langcode:ka -log  
-local_write:docker_pages
 
 # Update the monuments_all table
 docker-compose -f docker-compose-bot.yml run --rm db mysql -h db 
s51138__heritage_p --user=heritage --password=password < 
erfgoedbot/sql/fill_table_monuments_all.sql
diff --git a/erfgoedbot/add_object_location_monuments.py 
b/erfgoedbot/add_object_location_monuments.py
index 1ffc23f..25816f4 100644
--- a/erfgoedbot/add_object_location_monuments.py
+++ b/erfgoedbot/add_object_location_monuments.py
@@ -6,9 +6,9 @@
 
 '''
 import pywikibot
-from pywikibot import pagegenerators
 
 import monuments_config as mconfig
+import common as common
 from database_connection import (
 close_database_connection,
 connect_to_monuments_database,
@@ -16,7 +16,8 @@
 )
 
 
-def locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2, 
cursor2):
+def locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
+  cursor2, local_write):
 '''
 Locate images in a single country.
 '''
@@ -28,7 +29,7 @@
 locationTemplate = locateImage(
 page, monumentId, countrycode, lang, countryconfig, conn, cursor)
 if locationTemplate:
-addLocation(page, locationTemplate)
+addLocation(page, locationTemplate, local_write)
 
 
 def getMonumentsWithoutLocation(countryconfig, conn2, cursor2):
@@ -91,7 +92,10 @@
 # not already a template on the page.
 templates = page.templates()
 
-if u'Location' in page.templates() or u'Location dec' in page.templates() 
or u'Object location' in page.templates() or u'Object location dec' in 
page.templates():
+if (u'Location' in templates or
+u'Location dec' in templates or
+u'Object location' in templates or
+u'Object location dec' in templates):
 pywikibot.output(
 u'Location template already found at: %s' % page.title())
 return False
@@ -106,8 +110,6 @@
 '''
 Get coordinates from the erfgoed database
 '''
-result = None
-
 query = u"""SELECT lat, lon, source FROM monuments_all
 WHERE id=%s
 AND country=%s
@@ -126,7 +128,7 @@
 return False
 
 
-def addLocation(page, locationTemplate):
+def addLocation(page, locationTemplate, local_write):
 try:
 oldtext = page.get()
 except pywikibot.NoPage:
@@ -139,7 +141,8 @@
 newtext = putAfterTemplate(
 oldtext, u'Information', locationTemplate, loose=True)
 pywikibot.showDiff(oldtext, newtext)
-page.put(newtext, comment)
+common.save_to_wiki_or_local(page, comment, newtext,
+ local_path=local_write)
 
 
 def putAfterTemplate(oldtext, template, toadd, loose=True):
@@ -191,7 +194,7 @@
 newtext = oldtext
 cats = pywikibot.getCategoryLinks(newtext)
 ll = pywikibot.getLanguageLinks(newtext)
-nextext = pywikibot.removeLanguageLinks(newtext)
+newtext = pywikibot.removeLanguageLinks(newtext)
 newtext = pywikibot.removeCategoryLinks(newte

[MediaWiki-commits] [Gerrit] labs...heritage[master]: [WIP] Ensure skipped image categorizations are mentioned in ...

2017-09-17 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/378641 )

Change subject: [WIP] Ensure skipped image categorizations are mentioned in 
stats
..

[WIP] Ensure skipped image categorizations are mentioned in stats

This is a follow-up to b3c3ab031cc669e2747c110b0f9efc00493b86a9 which
blacklisted some countries from image categorization.

Also restructures how results are passed around, getting rid of some
previous hacks.

WIP because:
* Need to check that this works

Bug: T174871
Change-Id: I9a3cfebcf640da007088554e1de09828b9e76aa1
---
M erfgoedbot/categorize_images.py
1 file changed, 86 insertions(+), 30 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/41/378641/1

diff --git a/erfgoedbot/categorize_images.py b/erfgoedbot/categorize_images.py
index 1705d56..5ff1768 100644
--- a/erfgoedbot/categorize_images.py
+++ b/erfgoedbot/categorize_images.py
@@ -402,16 +402,20 @@
 raise NoCommonsCatFromWikidataItemException(page)
 
 
-def processCountry(countrycode, lang, countryconfig, commonsCatTemplates, 
conn, cursor, overridecat=None):
-'''
-Work on a single country.
-'''
+def processCountry(countrycode, lang, countryconfig, commonsCatTemplates,
+   conn, cursor, overridecat=None):
+"""Work on a single country."""
 if not countryconfig.get('commonsTemplate'):
 # No template found, just skip silently.
-basecat = u''
+basecat = None
 if countryconfig.get('commonsCategoryBase'):
 basecat = countryconfig.get('commonsCategoryBase')
-return (countrycode, lang, basecat, 0, 0, 0)
+return {
+'code': countrycode,
+'lang': lang,
+'cat': basecat,
+'cmt': 'skipped: no template'
+}
 
 if (not commonsCatTemplates):
 # No commonsCatTemplates found, just skip.
@@ -427,10 +431,9 @@
 commonsTemplate = countryconfig.get('commonsTemplate')
 harvest_type = countryconfig.get('type')
 
-if overridecat:
-commonsCategoryBase = pywikibot.Category(site, "%s:%s" % 
(site.namespace(14), overridecat))
-else:
-commonsCategoryBase = pywikibot.Category(site, "%s:%s" % 
(site.namespace(14), countryconfig.get('commonsCategoryBase')))
+category_name = overridecat or countryconfig.get('commonsCategoryBase')
+commonsCategoryBase = pywikibot.Category(
+site, "%s:%s" % (site.namespace(14), category_name))
 
 generator = pagegenerators.CategorizedPageGenerator(commonsCategoryBase)
 
@@ -447,39 +450,76 @@
 if success:
 categorizedImages += 1
 
-return (countrycode, lang, commonsCategoryBase.title(withNamespace=False), 
commonsTemplate, totalImages, categorizedImages)
+return {
+'code': countrycode,
+'lang': lang,
+'cat': commonsCategoryBase.title(withNamespace=False),
+'template': commonsTemplate,
+'total_images': totalImages,
+'cat_images': categorizedImages
+}
 
 
 def outputStatistics(statistics):
-'''
-Output the results of the bot as a nice wikitable
-'''
-output = u'{| class="wikitable sortable"\n'
-output += \
-u'! country !! [[:en:List of ISO 639-1 codes|lang]] !! Base category 
!! Template !! data-sort-type="number"|Total images !! 
data-sort-type="number"|Categorized images !! data-sort-type="number"|Images 
left !! data-sort-type="number"|Current image count\n'
+"""Output the results of the bot as a nice wikitable."""
+output = (
+u'{| class="wikitable sortable"\n'
+u'! country '
+u'!! [[:en:List of ISO 639-1 codes|lang]] '
+u'!! Base category '
+u'!! Template '
+u'!! data-sort-type="number"|Total images '
+u'!! data-sort-type="number"|Categorized images '
+u'!! data-sort-type="number"|Images left '
+u'!! data-sort-type="number"|Current image count'
+u'\n')
+
+output_row = (
+u'|-\n'
+u'|| {code} \n'
+u'|| {lang} \n'
+u'|| {cat} \n'
+u'|| {template} \n'
+u'|| {total_images} \n'
+u'|| {cat_images} \n'
+u'|| {leftover} \n'
+u'|| {pages_in_cat} \n')
 
 totalImages = 0
 categorizedImages = 0
 leftoverImages = 0
 
 for row in statistics:
-output += u'|-\n'
-output += u'|| %s \n' % (row[0],)
-output += u'|| %s \n' % (row[1],)
-output += u'|| [[:Category:%s]] \n' % (row[2],)
-output += u'|| {{tl|%s}} \n' % (row[3],)
 
-totalImages += row[4]
-output += u'|| %s \n' % (row[4],)
+leftover = '---'
+cat_link = '---'
+pages_in_cat = '---'
+template_link = '---'
+total_images = row.get('total_images') or '---'
+cat_images_or_cmt = row.get('cat_images') or row.get('cmt')
 
-categorizedImages += row[5

[MediaWiki-commits] [Gerrit] labs...heritage[master]: Ensure all added categories are pywikibot.Category objects

2017-09-13 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/377927 )

Change subject: Ensure all added categories are pywikibot.Category objects
..

Ensure all added categories are pywikibot.Category objects

Also enumerate third category acquisition method

Bug: T175839
Change-Id: Ibf92088b1e60c6d808f1a79c78c0fe3d2165fb20
---
M erfgoedbot/categorize_images.py
1 file changed, 16 insertions(+), 6 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/27/377927/1

diff --git a/erfgoedbot/categorize_images.py b/erfgoedbot/categorize_images.py
index 2566416..d94e4cc 100644
--- a/erfgoedbot/categorize_images.py
+++ b/erfgoedbot/categorize_images.py
@@ -282,12 +282,13 @@
 
 
 def get_categories_from_source_page(page, commonsCatTemplates):
-'''
+"""
 Get Commons categories based on page.
+
 1. If page contains a Commonscat template, use that category
 2. Else, try getting it from Wikidata
 3. Else pull Commonscat links from upper categories
-'''
+"""
 new_categories = set()
 categorisation_method = ''
 for commonsCatTemplateName in commonsCatTemplates:
@@ -298,7 +299,10 @@
 categorisation_method = 'C1: CommonsCat on the monument list page'
 if not len(new_categories):
 try:
-new_categories.add(get_Commons_category_via_Wikidata(page))
+site = pywikibot.Site(u'commons', u'commons')
+cat_title = get_Commons_category_via_Wikidata(page)
+cat = pywikibot.Category(site, cat_title)
+new_categories.add(cat)
 categorisation_method = 'C2: via Wikidata on the monument list 
page'
 except NoCommonsCatFromWikidataItemException:
 pass
@@ -328,9 +332,14 @@
 
 
 def getCategoryFromCommonscat(page, commonsCatTemplates):
-'''
-Get a Commons category based on a page with a Commonscat template
-'''
+"""
+Get a Commons category based on a page.
+
+1. Get category from commonscat template on page
+2. Get category from commonscat property or Commons category sitelink on
+   the Wikidata object corresponding to the page
+3. Get category with same name as page
+"""
 cat_title = None
 categorisation_method = '1'  # By 'default', we do not rely on Wikidata
 (template, params) = get_commonscat_template_in_page(page, 
commonsCatTemplates)
@@ -346,6 +355,7 @@
 pass
 
 if not cat_title:
+categorisation_method = '3'
 cat_title = page.title(withNamespace=False)
 
 site = pywikibot.Site(u'commons', u'commons')

-- 
To view, visit https://gerrit.wikimedia.org/r/377927
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibf92088b1e60c6d808f1a79c78c0fe3d2165fb20
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] labs...heritage[master]: Include tools directory in php linting

2017-09-13 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/377921 )

Change subject: Include tools directory in php linting
..

Include tools directory in php linting

Includes the tools directory (with the exception of the browser
sub-directory) in the php linting.

Also fixes any violations of the linting rules.

Change-Id: I758dd5a83beea050c4ee9d1ea2f1547770b7cf6c
---
M phpcs.xml
M tools/export_as_text.php
M tools/id_checker.php
M tools/rankings.php
M tools/stress-test.php
M tools/wlm-latest.php
M tools/wlm/gallery-all.php
M tools/wlmlast.php
8 files changed, 397 insertions(+), 399 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/21/377921/1

diff --git a/phpcs.xml b/phpcs.xml
index 1c220f6..e23f03e 100644
--- a/phpcs.xml
+++ b/phpcs.xml
@@ -1,6 +1,6 @@
 
 
-
+   



@@ -11,14 +11,14 @@



-.
-
-
-node_modules
-vendor
-api/tripplanner
-common
-map
-prox_search/cls
-tools
+   .
+   
+   
+   node_modules
+   vendor
+   api/tripplanner
+   common
+   map
+   prox_search/cls
+   tools/browser
 
diff --git a/tools/export_as_text.php b/tools/export_as_text.php
index 579bd10..3b75b0f 100644
--- a/tools/export_as_text.php
+++ b/tools/export_as_text.php
@@ -1,5 +1,5 @@
 fetch_assoc() ) {
$columns = 0;
-   foreach( $row as $value ) {
+   foreach ( $row as $value ) {
if ( $columns++ ) {
echo "\t";
}
@@ -41,7 +41,6 @@
 }
 
 mysqli_close( $db );
-
 
 /**
  * Helper function for handling MySQL errors
diff --git a/tools/id_checker.php b/tools/id_checker.php
index 964ab11..07c27b4 100644
--- a/tools/id_checker.php
+++ b/tools/id_checker.php
@@ -6,7 +6,6 @@
 require_once dirname( __DIR__ ) . '/api/common.php';
 require_once ( '/api/includes/CommonFunctions.php' );
 
-
 $db = Database::getDb();
 print '';
 print '
diff --git a/tools/rankings.php b/tools/rankings.php
index 85ad92a..73479e8 100644
--- a/tools/rankings.php
+++ b/tools/rankings.php
@@ -3,288 +3,288 @@
  * By default from 
http://commons.wikimedia.org/wiki/Category:Images_from_Wiki_Loves_Monuments_2011
  * if country if given, one of the subcategories.
  */
-header("Cache-Control: no-cache, must-revalidate");
-header("Expires: Thu, 01 Jan 1970 00:00:00 GMT");
-header('Content-type: text/html;; charset=utf-8');
+header( "Cache-Control: no-cache, must-revalidate" );
+header( "Expires: Thu, 01 Jan 1970 00:00:00 GMT" );
+header( 'Content-type: text/html;; charset=utf-8' );
 ?>
 
-
-
-* {
-margin:0;
-padding:0;
-}
-#photos-wlm {
-margin: 0;
-padding: 0;
-}
+   
+