DCausse has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/384989 )
Change subject: Fetch inner hits and only the first page
......................................................................
Fetch inner hits and only the first page
Change-Id: Ifc2dcb24111bfececa5c448f886f2db3a2b39aff
---
M oozie/query_clicks/hourly/query_clicks_hourly.hql
1 file changed, 5 insertions(+), 2 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/analytics
refs/changes/89/384989/1
diff --git a/oozie/query_clicks/hourly/query_clicks_hourly.hql
b/oozie/query_clicks/hourly/query_clicks_hourly.hql
index 8b825b5..a9099e5 100644
--- a/oozie/query_clicks/hourly/query_clicks_hourly.hql
+++ b/oozie/query_clicks/hourly/query_clicks_hourly.hql
@@ -56,6 +56,7 @@
ADD JAR
hdfs://analytics-hadoop/user/ebernhardson/refinery-hive-0.0.39-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION get_pageview_info AS
'org.wikimedia.analytics.refinery.hive.GetPageviewInfoUDF';
CREATE TEMPORARY FUNCTION get_main_search_request AS
'org.wikimedia.analytics.refinery.hive.GetMainSearchRequestUDF';
+CREATE TEMPORARY FUNCTION get_main_search_request_index AS
'org.wikimedia.analytics.refinery.hive.GetMainSearchRequestIndexUDF';
-- Generate row_timestamp, start_timestamp and end_timestamp variables to allow
-- requesting the specified year/month/day/hour, and the following hour, from
the
@@ -156,9 +157,11 @@
-- Make sure we only extract from content index
AND SIZE(get_main_search_request(csrs.wikiid, csrs.requests).indices)
== 1
AND get_main_search_request(csrs.wikiid, csrs.requests).indices[0]
LIKE '%_content'
+ -- Only fetch first page for simplicity
+ AND get_main_search_request(csrs.wikiid, csrs.requests).hitsoffset = 0
-- We only want 'normal' requests here. if the user requested more than
-- the default 20 results filter them out
- AND SIZE(csrs.hits) <= 20
+ AND SIZE(get_main_search_request(csrs.wikiid, csrs.requests).hits) <=
20
)
INSERT OVERWRITE TABLE
@@ -174,7 +177,7 @@
search_req.timestamp,
search_req.wikiid,
search_req.project,
- search_req.hits,
+ get_main_search_request(search_req.requests).hits,
web_req.clicks
FROM
search_req
--
To view, visit https://gerrit.wikimedia.org/r/384989
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifc2dcb24111bfececa5c448f886f2db3a2b39aff
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/analytics
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits