Milimetric has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/185239

Change subject: Speed up verification with scripts
......................................................................

Speed up verification with scripts

Change-Id: I3d7fb6749632423b4b28799486f103881a1c3f73
---
M .gitignore
A scripts/test-data-loading.sql
2 files changed, 77 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/data-warehouse 
refs/changes/39/185239/1

diff --git a/.gitignore b/.gitignore
index 1006ba8..5086269 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+*.swp
 *.pyc
 *.egg
 *egg-info
diff --git a/scripts/test-data-loading.sql b/scripts/test-data-loading.sql
new file mode 100644
index 0000000..e55181a
--- /dev/null
+++ b/scripts/test-data-loading.sql
@@ -0,0 +1,76 @@
+-- period of time to check for
+-- set @to     = '20140829000100';
+set @from   = '20140829000000';
+set @to     = '20140903000000';
+
+
+-- edit fact matches source revision and archive table
+ select etl.wiki,
+        etl.k                           as date_with_bad_match,
+        etl.value                       as value_in_warehouse,
+        coalesce(en.value, de.value)    as value_in_original_db
+
+   from (select wiki,
+                date_format(time, '%Y%m%d') as k,
+                count(*) as value
+           from warehouse.edit
+          where time between @from and @to
+            and wiki in ('dewiki', 'enwiki')
+          group by wiki, k
+        ) etl
+
+            left join
+
+        (select 'dewiki' as wiki,
+                left(time, 8) as k,
+                archived,
+                count(*) as value
+           from (select rev_timestamp as time,
+                        0 as archived
+                   from dewiki.revision
+                  where rev_timestamp between @from and @to
+
+                  union all
+
+                 select ar_timestamp as time,
+                        1 as archived
+                   from dewiki.archive
+                  where ar_timestamp between @from and @to
+                ) all_revisions
+          group by wiki, k
+        ) de            on de.k = etl.k
+                       and de.value <> etl.value
+                       and de.wiki = etl.wiki
+
+            left join
+
+        (select 'enwiki' as wiki,
+                left(time, 8) as k,
+                archived,
+                count(*) as value
+           from (select rev_timestamp as time,
+                        0 as archived
+                   from enwiki.revision
+                  where rev_timestamp between @from and @to
+
+                  union all
+
+                 select ar_timestamp as time,
+                        1 as archived
+                   from enwiki.archive
+                  where ar_timestamp between @from and @to
+                ) all_revisions
+          group by wiki, k
+        ) en            on en.k = etl.k
+                       and en.value <> etl.value
+                       and en.wiki = etl.wiki
+
+  where de.value is not null
+     or en.value is not null
+;
+
+
+
+-- user dimension matches source user table
+
+-- page dimension matches source page table

-- 
To view, visit https://gerrit.wikimedia.org/r/185239
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3d7fb6749632423b4b28799486f103881a1c3f73
Gerrit-PatchSet: 1
Gerrit-Project: analytics/data-warehouse
Gerrit-Branch: master
Gerrit-Owner: Milimetric <dandree...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to