[MediaWiki-commits] [Gerrit] Add monitoring of datafiles - change (analytics/geowiki)
Milimetric has submitted this change and it was merged. Change subject: Add monitoring of datafiles .. Add monitoring of datafiles Change-Id: I1d4693c991a7fae87fb081725f246fb4050ddbed --- M scripts/check_web_page.sh 1 file changed, 627 insertions(+), 1 deletion(-) Approvals: Milimetric: Verified; Looks good to me, approved diff --git a/scripts/check_web_page.sh b/scripts/check_web_page.sh index 3e17282..5c70e80 100755 --- a/scripts/check_web_page.sh +++ b/scripts/check_web_page.sh @@ -32,6 +32,17 @@ URL_BASE_DASHBOARD="$URL_BASE/dashboards" URL_BASE_GRAPH="$URL_BASE/graphs" URL_BASE_DATASOURCE="$URL_BASE/datasources" +URL_BASE_CSV="$URL_BASE/data/datafiles/gp" + +# Wikis with many active editors. +# For those wikis, we use allow less deviation from linear +# extrapolated values for the column with maximum active editors. +WIKIS_MANY_ACTIVE_EDITORS=( ar az be be_x_old bg bn ca cs da de el en eo es et eu fa fi gl he hi hr hu hy id is it ja ka kk kn ko lt lv mk ml mn mr ms nl nn no pl pt ro ru sh simple sk sl sr sq sv ta te th tl tr uk vi zh zh_yue ) + +# Wikis with hardly any active editors. +# For those wikis, linear extrapolation does not make sense. However, +# we check whether the needed files exist. +WIKIS_HARDLY_ACTIVE_EDITORS=( ab ace af ak als am an ang arc arz as ast av ay ba bar bat_smg bcl bh bi bjn bm bo bpy br bs bug bxr cbk_zam cdo ce ceb ch chr chy ckb co cr crh csb cu cv cy diq dsb dv dz ee eml ext ff fiu_vro fj fo frp frr fur fy ga gan gd glk gn got gu gv ha hak haw hif hsb ht ia ie ig ik ilo io iu jbo jv kaa kab kbd kg ki kl km koi krc ks ksh ku kv kw ky la lad lb lbe lez lg li lij lmo ln lo ltg map_bms mdf mg mhr mi mrj mt mwl my myv mzn na nah nap nds nds_nl ne new nov nrm nso nv ny oc om or os pa pag pam pap pcd pdc pi pih pms pnb pnt ps qu rm rmy rn roa_rup roa_tara rue rw sa sah sc scn sco sd se sg si sm sn so srn ss st stq su sw szl tet tg ti tk tn to tpi ts tt tum tw ty udm ug ur uz ve vec vep vls vo wa war wo wuu xal xh yi yo za zea zh_classical zh_min_nan zu ) # Some files have a last data point that is further back. Either # because the wiki ran out of active editors, or the file has to be @@ -39,11 +50,36 @@ # override the default expected date (i.e.: current day) of the last # data point of a file. declare -A EXPECTED_LAST_DATE_OVERRIDE +EXPECTED_LAST_DATE_OVERRIDE["bm_top10"]="2013-08-23" +EXPECTED_LAST_DATE_OVERRIDE["ee_top10"]="2013-08-13" +EXPECTED_LAST_DATE_OVERRIDE["ff_top10"]="2013-08-13" EXPECTED_LAST_DATE_OVERRIDE["global_south_editor_fractions"]="2013-08-20" EXPECTED_LAST_DATE_OVERRIDE["grants_count_by_global_south"]="2013-06-01" EXPECTED_LAST_DATE_OVERRIDE["grants_count_by_program"]="2013-06-01" EXPECTED_LAST_DATE_OVERRIDE["grants_spending_by_global_south"]="2013-06-01" EXPECTED_LAST_DATE_OVERRIDE["grants_spending_by_program"]="2013-06-01" +EXPECTED_LAST_DATE_OVERRIDE["lg_all"]="2013-08-09" +EXPECTED_LAST_DATE_OVERRIDE["lg_top10"]="2013-06-20" +EXPECTED_LAST_DATE_OVERRIDE["pnt_all"]="2013-08-15" +EXPECTED_LAST_DATE_OVERRIDE["pnt_top10"]="2013-08-01" +EXPECTED_LAST_DATE_OVERRIDE["rn_all"]="2013-08-27" +EXPECTED_LAST_DATE_OVERRIDE["rn_top10"]="2013-08-27" +EXPECTED_LAST_DATE_OVERRIDE["roa_rup_top10"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["sg_all"]="2013-09-05" +EXPECTED_LAST_DATE_OVERRIDE["sg_top10"]="2013-08-29" +EXPECTED_LAST_DATE_OVERRIDE["sm_all"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["sm_top10"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["st_top10"]="2013-09-06" +EXPECTED_LAST_DATE_OVERRIDE["ti_all"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["ti_top10"]="2013-08-23" +EXPECTED_LAST_DATE_OVERRIDE["to_top10"]="2013-09-05" +EXPECTED_LAST_DATE_OVERRIDE["ts_top10"]="2013-08-26" +EXPECTED_LAST_DATE_OVERRIDE["tum_all"]="2013-08-06" +EXPECTED_LAST_DATE_OVERRIDE["tum_top10"]="2013-08-02" +EXPECTED_LAST_DATE_OVERRIDE["tw_top10"]="2013-08-31" +EXPECTED_LAST_DATE_OVERRIDE["ve_all"]="2013-07-31" +EXPECTED_LAST_DATE_OVERRIDE["ve_top10"]="2013-06-07" +EXPECTED_LAST_DATE_OVERRIDE["xh_top10"]="2013-08-13" # Set DEBUG to "local" to download files into /tmp and use those copies # instead of fetching the files again and again for each run. Files do @@ -188,6 +224,64 @@ } #--- +# Downloads a csv file and does some basic checks on the file +# +# It is assured that +# * the file's last line is for the expected date, +# * each line of the file has the same amount of columns, and +# * (for stubs != "global_south_editor_fractions") that the file has +# at least 10 columns. +# +# Input: +# $1 - The csv's stub to download. The URL to download is generated +#from this stub. E.g.: 'global_south', 'de_top10'. +# +# Output: +# DOWNLOADED_FILE_ABS - The absolute name of the file into which the +# URL's content can be found. Do not modify this file, as it may +# be reused for different runs, when in D
[MediaWiki-commits] [Gerrit] Add monitoring of datafiles - change (analytics/geowiki)
QChris has uploaded a new change for review. https://gerrit.wikimedia.org/r/83813 Change subject: Add monitoring of datafiles .. Add monitoring of datafiles Change-Id: I1d4693c991a7fae87fb081725f246fb4050ddbed --- M scripts/check_web_page.sh 1 file changed, 627 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/geowiki refs/changes/13/83813/1 diff --git a/scripts/check_web_page.sh b/scripts/check_web_page.sh index 3e17282..5c70e80 100755 --- a/scripts/check_web_page.sh +++ b/scripts/check_web_page.sh @@ -32,6 +32,17 @@ URL_BASE_DASHBOARD="$URL_BASE/dashboards" URL_BASE_GRAPH="$URL_BASE/graphs" URL_BASE_DATASOURCE="$URL_BASE/datasources" +URL_BASE_CSV="$URL_BASE/data/datafiles/gp" + +# Wikis with many active editors. +# For those wikis, we use allow less deviation from linear +# extrapolated values for the column with maximum active editors. +WIKIS_MANY_ACTIVE_EDITORS=( ar az be be_x_old bg bn ca cs da de el en eo es et eu fa fi gl he hi hr hu hy id is it ja ka kk kn ko lt lv mk ml mn mr ms nl nn no pl pt ro ru sh simple sk sl sr sq sv ta te th tl tr uk vi zh zh_yue ) + +# Wikis with hardly any active editors. +# For those wikis, linear extrapolation does not make sense. However, +# we check whether the needed files exist. +WIKIS_HARDLY_ACTIVE_EDITORS=( ab ace af ak als am an ang arc arz as ast av ay ba bar bat_smg bcl bh bi bjn bm bo bpy br bs bug bxr cbk_zam cdo ce ceb ch chr chy ckb co cr crh csb cu cv cy diq dsb dv dz ee eml ext ff fiu_vro fj fo frp frr fur fy ga gan gd glk gn got gu gv ha hak haw hif hsb ht ia ie ig ik ilo io iu jbo jv kaa kab kbd kg ki kl km koi krc ks ksh ku kv kw ky la lad lb lbe lez lg li lij lmo ln lo ltg map_bms mdf mg mhr mi mrj mt mwl my myv mzn na nah nap nds nds_nl ne new nov nrm nso nv ny oc om or os pa pag pam pap pcd pdc pi pih pms pnb pnt ps qu rm rmy rn roa_rup roa_tara rue rw sa sah sc scn sco sd se sg si sm sn so srn ss st stq su sw szl tet tg ti tk tn to tpi ts tt tum tw ty udm ug ur uz ve vec vep vls vo wa war wo wuu xal xh yi yo za zea zh_classical zh_min_nan zu ) # Some files have a last data point that is further back. Either # because the wiki ran out of active editors, or the file has to be @@ -39,11 +50,36 @@ # override the default expected date (i.e.: current day) of the last # data point of a file. declare -A EXPECTED_LAST_DATE_OVERRIDE +EXPECTED_LAST_DATE_OVERRIDE["bm_top10"]="2013-08-23" +EXPECTED_LAST_DATE_OVERRIDE["ee_top10"]="2013-08-13" +EXPECTED_LAST_DATE_OVERRIDE["ff_top10"]="2013-08-13" EXPECTED_LAST_DATE_OVERRIDE["global_south_editor_fractions"]="2013-08-20" EXPECTED_LAST_DATE_OVERRIDE["grants_count_by_global_south"]="2013-06-01" EXPECTED_LAST_DATE_OVERRIDE["grants_count_by_program"]="2013-06-01" EXPECTED_LAST_DATE_OVERRIDE["grants_spending_by_global_south"]="2013-06-01" EXPECTED_LAST_DATE_OVERRIDE["grants_spending_by_program"]="2013-06-01" +EXPECTED_LAST_DATE_OVERRIDE["lg_all"]="2013-08-09" +EXPECTED_LAST_DATE_OVERRIDE["lg_top10"]="2013-06-20" +EXPECTED_LAST_DATE_OVERRIDE["pnt_all"]="2013-08-15" +EXPECTED_LAST_DATE_OVERRIDE["pnt_top10"]="2013-08-01" +EXPECTED_LAST_DATE_OVERRIDE["rn_all"]="2013-08-27" +EXPECTED_LAST_DATE_OVERRIDE["rn_top10"]="2013-08-27" +EXPECTED_LAST_DATE_OVERRIDE["roa_rup_top10"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["sg_all"]="2013-09-05" +EXPECTED_LAST_DATE_OVERRIDE["sg_top10"]="2013-08-29" +EXPECTED_LAST_DATE_OVERRIDE["sm_all"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["sm_top10"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["st_top10"]="2013-09-06" +EXPECTED_LAST_DATE_OVERRIDE["ti_all"]="2013-09-07" +EXPECTED_LAST_DATE_OVERRIDE["ti_top10"]="2013-08-23" +EXPECTED_LAST_DATE_OVERRIDE["to_top10"]="2013-09-05" +EXPECTED_LAST_DATE_OVERRIDE["ts_top10"]="2013-08-26" +EXPECTED_LAST_DATE_OVERRIDE["tum_all"]="2013-08-06" +EXPECTED_LAST_DATE_OVERRIDE["tum_top10"]="2013-08-02" +EXPECTED_LAST_DATE_OVERRIDE["tw_top10"]="2013-08-31" +EXPECTED_LAST_DATE_OVERRIDE["ve_all"]="2013-07-31" +EXPECTED_LAST_DATE_OVERRIDE["ve_top10"]="2013-06-07" +EXPECTED_LAST_DATE_OVERRIDE["xh_top10"]="2013-08-13" # Set DEBUG to "local" to download files into /tmp and use those copies # instead of fetching the files again and again for each run. Files do @@ -188,6 +224,64 @@ } #--- +# Downloads a csv file and does some basic checks on the file +# +# It is assured that +# * the file's last line is for the expected date, +# * each line of the file has the same amount of columns, and +# * (for stubs != "global_south_editor_fractions") that the file has +# at least 10 columns. +# +# Input: +# $1 - The csv's stub to download. The URL to download is generated +#from this stub. E.g.: 'global_south', 'de_top10'. +# +# Output: +# DOWNLOADED_FILE_ABS - The absolute name of the file into which the +# URL's content can be found. Do not modify this file, as it