Milimetric has uploaded a new change for review. https://gerrit.wikimedia.org/r/185239
Change subject: Speed up verification with scripts ...................................................................... Speed up verification with scripts Change-Id: I3d7fb6749632423b4b28799486f103881a1c3f73 --- M .gitignore A scripts/test-data-loading.sql 2 files changed, 77 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/data-warehouse refs/changes/39/185239/1 diff --git a/.gitignore b/.gitignore index 1006ba8..5086269 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.swp *.pyc *.egg *egg-info diff --git a/scripts/test-data-loading.sql b/scripts/test-data-loading.sql new file mode 100644 index 0000000..e55181a --- /dev/null +++ b/scripts/test-data-loading.sql @@ -0,0 +1,76 @@ +-- period of time to check for +-- set @to = '20140829000100'; +set @from = '20140829000000'; +set @to = '20140903000000'; + + +-- edit fact matches source revision and archive table + select etl.wiki, + etl.k as date_with_bad_match, + etl.value as value_in_warehouse, + coalesce(en.value, de.value) as value_in_original_db + + from (select wiki, + date_format(time, '%Y%m%d') as k, + count(*) as value + from warehouse.edit + where time between @from and @to + and wiki in ('dewiki', 'enwiki') + group by wiki, k + ) etl + + left join + + (select 'dewiki' as wiki, + left(time, 8) as k, + archived, + count(*) as value + from (select rev_timestamp as time, + 0 as archived + from dewiki.revision + where rev_timestamp between @from and @to + + union all + + select ar_timestamp as time, + 1 as archived + from dewiki.archive + where ar_timestamp between @from and @to + ) all_revisions + group by wiki, k + ) de on de.k = etl.k + and de.value <> etl.value + and de.wiki = etl.wiki + + left join + + (select 'enwiki' as wiki, + left(time, 8) as k, + archived, + count(*) as value + from (select rev_timestamp as time, + 0 as archived + from enwiki.revision + where rev_timestamp between @from and @to + + union all + + select ar_timestamp as time, + 1 as archived + from enwiki.archive + where ar_timestamp between @from and @to + ) all_revisions + group by wiki, k + ) en on en.k = etl.k + and en.value <> etl.value + and en.wiki = etl.wiki + + where de.value is not null + or en.value is not null +; + + + +-- user dimension matches source user table + +-- page dimension matches source page table -- To view, visit https://gerrit.wikimedia.org/r/185239 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I3d7fb6749632423b4b28799486f103881a1c3f73 Gerrit-PatchSet: 1 Gerrit-Project: analytics/data-warehouse Gerrit-Branch: master Gerrit-Owner: Milimetric <dandree...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits