Elukey has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/357769 )

Change subject: Remove any trace of the maps cluster
......................................................................

Remove any trace of the maps cluster

The cache::maps cluster has been merged to the upload one,
this commit takes care of cleaning up all the maps-related
configurations that are not needed anymore.

Change-Id: Ie64ef1bba9b2d83e195f23c8b83b483e5c2f77d1
---
M bin/refinery-dump-status-webrequest-partitions
M oozie/webrequest/datasets.xml
M oozie/webrequest/datasets_raw.xml
M oozie/webrequest/load/bundle.xml
4 files changed, 4 insertions(+), 43 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/69/357769/1

diff --git a/bin/refinery-dump-status-webrequest-partitions 
b/bin/refinery-dump-status-webrequest-partitions
index 5603845..89e25bd 100755
--- a/bin/refinery-dump-status-webrequest-partitions
+++ b/bin/refinery-dump-status-webrequest-partitions
@@ -73,8 +73,8 @@
 }
 
 add_dataset "mediacounts" "daily" "   full  | top1000 |"
-add_dataset "raw_webrequest" "hourly" "     maps    |     misc    |     text   
 |    upload   |"
-add_dataset "webrequest" "hourly" "  maps  |  misc  |  text  | upload |"
+add_dataset "raw_webrequest" "hourly" "  misc    |     text    |    upload   |"
+add_dataset "webrequest" "hourly" "  misc  |  text  | upload |"
 add_dataset "pageview" "hourly" "  hourly  |"
 add_dataset "projectview" "hourly" "   hourly    |"
 
@@ -414,7 +414,7 @@
 
     local DATE_HDFS_PADDED="$(date --utc -d "$DATE" +'%Y/%m/%d/%H')"
 
-    for SOURCE in maps misc text upload
+    for SOURCE in misc text upload
     do
         log_no_lf "    "
         dump_dataset_raw_webrequest_partition "$DATE_HDFS_PADDED" "$SOURCE"
@@ -428,7 +428,7 @@
     local DATE_DIRS_REL="$(date --utc -d "$DATE" 
+'year=%Y/month=%m/day=%d/hour=%H')"
     DATE_DIRS_REL="${DATE_DIRS_REL//=0/=}"
 
-    for SOURCE in maps misc text upload
+    for SOURCE in misc text upload
     do
         local STATUS="X"
         
SUCCESS_FILE_ABS="$WEBREQUEST_DATA_DIR_ABS/webrequest_source=$SOURCE/$DATE_DIRS_REL/_SUCCESS"
diff --git a/oozie/webrequest/datasets.xml b/oozie/webrequest/datasets.xml
index 3925db3..ea67dc6 100644
--- a/oozie/webrequest/datasets.xml
+++ b/oozie/webrequest/datasets.xml
@@ -28,14 +28,6 @@
     at “${...}” as input for the second EL level. There, the variables hold
     their expected values, and we can start unpadding them.
     -->
-    <dataset name="webrequest_maps"
-             frequency="${coord:hours(1)}"
-             initial-instance="${start_time}"
-             timezone="Universal">
-        
<uri-template>${webrequest_data_directory}/webrequest_source=maps/year=${YEAR}/month=${"$"}{MONTH
 + 0}/day=${"$"}{DAY + 0}/hour=${"$"}{HOUR + 0}</uri-template>
-        <done-flag>_SUCCESS</done-flag>
-    </dataset>
-
     <dataset name="webrequest_misc"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
diff --git a/oozie/webrequest/datasets_raw.xml 
b/oozie/webrequest/datasets_raw.xml
index f8580ab..981ad2a 100644
--- a/oozie/webrequest/datasets_raw.xml
+++ b/oozie/webrequest/datasets_raw.xml
@@ -15,13 +15,6 @@
     not care if the sequence stats have been checked.  This will simply include
     any imported hourly data directories that exist.
     -->
-    <dataset name="webrequest_maps_raw_unchecked"
-             frequency="${coord:hours(1)}"
-             initial-instance="${start_time}"
-             timezone="Universal">
-        
<uri-template>${webrequest_raw_data_directory}/webrequest_maps/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag>_IMPORTED</done-flag>
-    </dataset>
     <dataset name="webrequest_misc_raw_unchecked"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
@@ -50,13 +43,6 @@
     Hive partition's existence.  This data has not been checked at all, which 
means
     there may be significant data loss or duplicate data here.
     -->
-    <dataset name="webrequest_maps_raw_partitioned"
-             frequency="${coord:hours(1)}"
-             initial-instance="${start_time}"
-             timezone="Universal">
-        
<uri-template>${webrequest_raw_data_directory}/webrequest_maps/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag>_PARTITIONED</done-flag>
-    </dataset>
     <dataset name="webrequest_misc_raw_partitioned"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
@@ -88,13 +74,6 @@
     and it has been determined that the expected number of requests
     equals the actual number of entires for this hour.
     -->
-    <dataset name="webrequest_maps_raw"
-             frequency="${coord:hours(1)}"
-             initial-instance="${start_time}"
-             timezone="Universal">
-        
<uri-template>${webrequest_raw_data_directory}/webrequest_maps/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag>_SUCCESS</done-flag>
-    </dataset>
     <dataset name="webrequest_misc_raw"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
diff --git a/oozie/webrequest/load/bundle.xml b/oozie/webrequest/load/bundle.xml
index 7298447..4f5a0b8 100644
--- a/oozie/webrequest/load/bundle.xml
+++ b/oozie/webrequest/load/bundle.xml
@@ -36,16 +36,6 @@
         <property><name>sla_alert_contact</name></property>
     </parameters>
 
-    <coordinator name="webrequest-load-coord-maps">
-        <app-path>${coordinator_file}</app-path>
-        <configuration>
-            <property>
-                <name>webrequest_source</name>
-                <value>maps</value>
-            </property>
-        </configuration>
-    </coordinator>
-
     <coordinator name="webrequest-load-coord-misc">
         <app-path>${coordinator_file}</app-path>
         <configuration>

-- 
To view, visit https://gerrit.wikimedia.org/r/357769
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie64ef1bba9b2d83e195f23c8b83b483e5c2f77d1
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Elukey <ltosc...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to