QChris has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/117321

Change subject: Since mobile jobs have been turned off, remove now unneded pig 
scripts
......................................................................

Since mobile jobs have been turned off, remove now unneded pig scripts

The mobile jobs output seems to be unused [1] and have hence been
turned off. We remove the corresponding (now unused) pig scripts, as
they depend on dclass, and would be in the way for the upcoming
removal of dclass.

[1] http://lists.wikimedia.org/pipermail/analytics/2014-March/001662.html

Change-Id: I13caedc40b43d01d005c299bd9e3088c1ba603c4
---
D pig/mobile_device_props.pig
D pig/mobile_platform.pig
2 files changed, 0 insertions(+), 152 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/kraken 
refs/changes/21/117321/1

diff --git a/pig/mobile_device_props.pig b/pig/mobile_device_props.pig
deleted file mode 100644
index 8ecaa01..0000000
--- a/pig/mobile_device_props.pig
+++ /dev/null
@@ -1,60 +0,0 @@
-REGISTER 'geoip-1.2.9-patch-2-SNAPSHOT.jar'
-REGISTER 'kraken-generic-0.0.2-SNAPSHOT-jar-with-dependencies.jar'
-REGISTER 'kraken-dclass-0.0.2-SNAPSHOT.jar'
-REGISTER 'kraken-pig-0.0.2-SNAPSHOT.jar'
-
--- Script Parameters: pass via -p param_name=param_value, ex: -p 
date_bucket_regex=2013-03-24_00
-%default date_bucket_format 'yyyy-MM-dd_HH';    -- Format applied to 
timestamps for aggregation into buckets. Default: hourly.
-%default date_bucket_regex '.*';                -- Regex used to filter the 
formatted date_buckets; must match whole line. Default: no filtering.
-
-DEFINE DATE_BUCKET  
org.wikimedia.analytics.kraken.pig.ConvertDateFormat('yyyy-MM-dd\'T\'HH:mm:ss', 
'$date_bucket_format');
-DEFINE DCLASS       org.wikimedia.analytics.kraken.pig.UserAgentClassifier();
-DEFINE IS_PAGEVIEW  org.wikimedia.analytics.kraken.pig.PageViewFilterFunc();
-
-IMPORT 'include/load_webrequest.pig'; -- See include/load_webrequest.pig
-log_fields = LOAD_WEBREQUEST('$input');
-
-log_fields = FILTER log_fields
-    BY (    (DATE_BUCKET(timestamp) MATCHES '$date_bucket_regex')
-        AND IS_PAGEVIEW(uri, referer, user_agent, http_status, remote_addr, 
content_type, request_method)
-    );
-
-device_info_with_non_wmf = FOREACH log_fields
-    GENERATE
-        DATE_BUCKET(timestamp)      AS date_bucket:chararray,
-        FLATTEN(DCLASS(user_agent)) AS (
-            vendor:chararray,
-            model:chararray,
-            device_os:chararray,
-            device_os_version:chararray,
-            device_class:chararray,
-            browser:chararray,
-            browser_version:chararray,
-            wmf_mobile_app:chararray,
-            has_javascript:boolean,
-            display_dimensions:chararray,
-            input_device:chararray,
-            non_wmf_mobile_app:chararray
-        );
-
-device_info = FOREACH device_info_with_non_wmf
-    GENERATE
-        date_bucket,
-        vendor,
-        model,
-        device_os,
-        device_os_version,
-        device_class,
-        browser,
-        browser_version,
-        wmf_mobile_app,
-        has_javascript,
-        display_dimensions,
-        input_device
-    ;
-
-device_info_count = FOREACH (GROUP device_info BY (date_bucket, device_class, 
device_os))
-    GENERATE FLATTEN($0), COUNT($1) AS num:int;
-device_info_count = ORDER device_info_count BY date_bucket, device_class, 
device_os;
-
-STORE device_info_count INTO '$output' USING PigStorage();
diff --git a/pig/mobile_platform.pig b/pig/mobile_platform.pig
deleted file mode 100644
index e90047c..0000000
--- a/pig/mobile_platform.pig
+++ /dev/null
@@ -1,92 +0,0 @@
-REGISTER 'kraken-generic-0.0.2-SNAPSHOT-jar-with-dependencies.jar'
-REGISTER 'kraken-dclass-0.0.2-SNAPSHOT.jar'
-REGISTER 'kraken-pig-0.0.2-SNAPSHOT.jar'
-
--- Script Parameters: pass via -p param_name=param_value, ex: -p 
date_bucket_regex=2013-03-24
-%default date_bucket_format 'yyyy-MM-dd';       -- Format applied to 
timestamps for aggregation into buckets. Default: Daily.
-%default date_bucket_regex '.*';                -- Regex used to filter the 
formatted date_buckets; must match whole line. Default: no filtering.
-
-DEFINE DATE_BUCKET  
org.wikimedia.analytics.kraken.pig.ConvertDateFormat('yyyy-MM-dd\'T\'HH:mm:ss', 
'$date_bucket_format');
-/* For testing:
-DEFINE DATE_BUCKET  
org.wikimedia.analytics.kraken.pig.ConvertDateFormat('yyyy-MM-dd\'T\'HH:mm:ss', 
'yyyy-MM-dd');
-*/
-DEFINE DCLASS       org.wikimedia.analytics.kraken.pig.UserAgentClassifier();
-DEFINE IS_PAGEVIEW  org.wikimedia.analytics.kraken.pig.PageViewFilterFunc();
-
-IMPORT 'include/load_webrequest.pig';
-
-/* For testing:
-************* Unsampled *************
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-01-31_*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-01-31_22.30*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-03-22_16.30*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-03-25_22.30*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-03-25_16.30*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-04-01_16.45*,hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-04-01_17.*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-wikipedia-mobile/dt=2013-04-01*');
-
-************* Sampled ***************
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-all-sampled-1000/dt=2013-04-15_12*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-all-sampled-1000/dt=2013-04-15*');
-log_fields = 
LOAD_WEBREQUEST('hdfs:///wmf/raw/webrequest/webrequest-all-sampled-1000/dt=2013-04*');
-
-************* Local *****************
-log_fields = LOAD_WEBREQUEST('pig.sample.webrequest.wikipedia.mobile*');
-*/
-log_fields = LOAD_WEBREQUEST('$input');
-
-log_fields = FOREACH log_fields
-    GENERATE
-        DATE_BUCKET(timestamp) as date_bucket,
-        FLATTEN(STRSPLIT(remote_addr,'\\|')) as (ip_addr:chararray, 
country_code:chararray),
-        uri, referer, user_agent, http_status, content_type, request_method
-    ;
-
-/* For testing:
-matching_log_fields = FILTER log_fields BY (
-    (date_bucket MATCHES '.*')
-    AND IS_PAGEVIEW(uri, referer, user_agent, http_status, ip_addr, 
content_type, request_method)
-);
-NOTE: the ip_addr field is anonymized.  During anonymization, internal IP 
addresses can be made external and vice versa.
-TODO: investigate more carefully whether the anonymization coupled with 
"IS_PAGEVIEW" changes the results significantly
-*/
-matching_log_fields = FILTER log_fields BY (
-    (date_bucket MATCHES '$date_bucket_regex')
-    AND IS_PAGEVIEW(uri, referer, user_agent, http_status, ip_addr, 
content_type, request_method)
-);
-
-platform_info = FOREACH matching_log_fields
-    GENERATE
-        date_bucket,
-        FLATTEN(DCLASS(REPLACE(user_agent, '%20', ' '))) AS (
-            vendor:chararray,
-            model:chararray,
-            device_os:chararray,
-            device_os_version:chararray,
-            device_class:chararray,
-            browser:chararray,
-            browser_version:chararray,
-            wmf_mobile_app:chararray,
-            has_javascript:boolean,
-            display_dimensions:chararray,
-            input_device:chararray,
-            non_wmf_mobile_app:chararray
-        )
-    ;
-official_platform_info = FILTER platform_info BY wmf_mobile_app is not null;
-official_platform_info = FOREACH official_platform_info GENERATE date_bucket, 
wmf_mobile_app;
-
-official_platform_info_group = GROUP official_platform_info BY (date_bucket, 
wmf_mobile_app);
-official_platform_info_count = FOREACH official_platform_info_group GENERATE 
FLATTEN(group), COUNT(official_platform_info) * 1000;
-ordered_official_platform_info_count = ORDER official_platform_info_count BY 
date_bucket, wmf_mobile_app;
-
-STORE ordered_official_platform_info_count INTO '$output/official' USING 
PigStorage();
-
-unofficial_platform_info = FILTER platform_info BY non_wmf_mobile_app is not 
null;
-unofficial_platform_info = FOREACH unofficial_platform_info GENERATE 
date_bucket, non_wmf_mobile_app;
-
-unofficial_platform_info_group = GROUP unofficial_platform_info BY 
(date_bucket, non_wmf_mobile_app);
-unofficial_platform_info_count = FOREACH unofficial_platform_info_group 
GENERATE FLATTEN(group), COUNT(unofficial_platform_info) * 1000;
-ordered_unofficial_platform_info_count = ORDER unofficial_platform_info_count 
BY date_bucket, non_wmf_mobile_app;
-
-STORE ordered_unofficial_platform_info_count INTO '$output/unofficial' USING 
PigStorage();

-- 
To view, visit https://gerrit.wikimedia.org/r/117321
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I13caedc40b43d01d005c299bd9e3088c1ba603c4
Gerrit-PatchSet: 1
Gerrit-Project: analytics/kraken
Gerrit-Branch: master
Gerrit-Owner: QChris <christ...@quelltextlich.at>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to