Joal has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/360866 )
Change subject: Add two tables to sqoop on hadoop
......................................................................
Add two tables to sqoop on hadoop
Those two tables are needed for production run
of the clickstream dataset.
Change-Id: I97455bd3779906d7228595e10c2e134b39720b42
---
M bin/sqoop-mediawiki-tables
1 file changed, 25 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/66/360866/1
diff --git a/bin/sqoop-mediawiki-tables b/bin/sqoop-mediawiki-tables
index a2571ae..1ef30c2 100755
--- a/bin/sqoop-mediawiki-tables
+++ b/bin/sqoop-mediawiki-tables
@@ -227,6 +227,31 @@
])),
}
+ queries['pagelinks'] = {
+ 'query': '''
+ select pl_from,
+ pl_namespace,
+ convert(pl_title using utf8) pl_title,
+ pl_from_namespace
+
+ from pagelinks
+ where $CONDITIONS
+ ''',
+ }
+
+ queries['redirect'] = {
+ 'query': '''
+ select rd_from,
+ rd_namespace,
+ convert(rd_title using utf8) rd_title,
+ convert(rd_interwiki using utf8) rd_interwiki,
+ convert(rd_fragment using utf8) rd_fragment
+
+ from redirect
+ where $CONDITIONS
+ ''',
+ }
+
queries['revision'] = {
'query': '''
select rev_id,
--
To view, visit https://gerrit.wikimedia.org/r/360866
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I97455bd3779906d7228595e10c2e134b39720b42
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits