Elukey has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/357769 )
Change subject: Remove any trace of the maps cluster ...................................................................... Remove any trace of the maps cluster The cache::maps cluster has been merged to the upload one, this commit takes care of cleaning up all the maps-related configurations that are not needed anymore. Change-Id: Ie64ef1bba9b2d83e195f23c8b83b483e5c2f77d1 --- M bin/refinery-dump-status-webrequest-partitions M oozie/webrequest/datasets.xml M oozie/webrequest/datasets_raw.xml M oozie/webrequest/load/bundle.xml 4 files changed, 4 insertions(+), 43 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery refs/changes/69/357769/1 diff --git a/bin/refinery-dump-status-webrequest-partitions b/bin/refinery-dump-status-webrequest-partitions index 5603845..89e25bd 100755 --- a/bin/refinery-dump-status-webrequest-partitions +++ b/bin/refinery-dump-status-webrequest-partitions @@ -73,8 +73,8 @@ } add_dataset "mediacounts" "daily" " full | top1000 |" -add_dataset "raw_webrequest" "hourly" " maps | misc | text | upload |" -add_dataset "webrequest" "hourly" " maps | misc | text | upload |" +add_dataset "raw_webrequest" "hourly" " misc | text | upload |" +add_dataset "webrequest" "hourly" " misc | text | upload |" add_dataset "pageview" "hourly" " hourly |" add_dataset "projectview" "hourly" " hourly |" @@ -414,7 +414,7 @@ local DATE_HDFS_PADDED="$(date --utc -d "$DATE" +'%Y/%m/%d/%H')" - for SOURCE in maps misc text upload + for SOURCE in misc text upload do log_no_lf " " dump_dataset_raw_webrequest_partition "$DATE_HDFS_PADDED" "$SOURCE" @@ -428,7 +428,7 @@ local DATE_DIRS_REL="$(date --utc -d "$DATE" +'year=%Y/month=%m/day=%d/hour=%H')" DATE_DIRS_REL="${DATE_DIRS_REL//=0/=}" - for SOURCE in maps misc text upload + for SOURCE in misc text upload do local STATUS="X" SUCCESS_FILE_ABS="$WEBREQUEST_DATA_DIR_ABS/webrequest_source=$SOURCE/$DATE_DIRS_REL/_SUCCESS" diff --git a/oozie/webrequest/datasets.xml b/oozie/webrequest/datasets.xml index 3925db3..ea67dc6 100644 --- a/oozie/webrequest/datasets.xml +++ b/oozie/webrequest/datasets.xml @@ -28,14 +28,6 @@ at “${...}” as input for the second EL level. There, the variables hold their expected values, and we can start unpadding them. --> - <dataset name="webrequest_maps" - frequency="${coord:hours(1)}" - initial-instance="${start_time}" - timezone="Universal"> - <uri-template>${webrequest_data_directory}/webrequest_source=maps/year=${YEAR}/month=${"$"}{MONTH + 0}/day=${"$"}{DAY + 0}/hour=${"$"}{HOUR + 0}</uri-template> - <done-flag>_SUCCESS</done-flag> - </dataset> - <dataset name="webrequest_misc" frequency="${coord:hours(1)}" initial-instance="${start_time}" diff --git a/oozie/webrequest/datasets_raw.xml b/oozie/webrequest/datasets_raw.xml index f8580ab..981ad2a 100644 --- a/oozie/webrequest/datasets_raw.xml +++ b/oozie/webrequest/datasets_raw.xml @@ -15,13 +15,6 @@ not care if the sequence stats have been checked. This will simply include any imported hourly data directories that exist. --> - <dataset name="webrequest_maps_raw_unchecked" - frequency="${coord:hours(1)}" - initial-instance="${start_time}" - timezone="Universal"> - <uri-template>${webrequest_raw_data_directory}/webrequest_maps/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template> - <done-flag>_IMPORTED</done-flag> - </dataset> <dataset name="webrequest_misc_raw_unchecked" frequency="${coord:hours(1)}" initial-instance="${start_time}" @@ -50,13 +43,6 @@ Hive partition's existence. This data has not been checked at all, which means there may be significant data loss or duplicate data here. --> - <dataset name="webrequest_maps_raw_partitioned" - frequency="${coord:hours(1)}" - initial-instance="${start_time}" - timezone="Universal"> - <uri-template>${webrequest_raw_data_directory}/webrequest_maps/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template> - <done-flag>_PARTITIONED</done-flag> - </dataset> <dataset name="webrequest_misc_raw_partitioned" frequency="${coord:hours(1)}" initial-instance="${start_time}" @@ -88,13 +74,6 @@ and it has been determined that the expected number of requests equals the actual number of entires for this hour. --> - <dataset name="webrequest_maps_raw" - frequency="${coord:hours(1)}" - initial-instance="${start_time}" - timezone="Universal"> - <uri-template>${webrequest_raw_data_directory}/webrequest_maps/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template> - <done-flag>_SUCCESS</done-flag> - </dataset> <dataset name="webrequest_misc_raw" frequency="${coord:hours(1)}" initial-instance="${start_time}" diff --git a/oozie/webrequest/load/bundle.xml b/oozie/webrequest/load/bundle.xml index 7298447..4f5a0b8 100644 --- a/oozie/webrequest/load/bundle.xml +++ b/oozie/webrequest/load/bundle.xml @@ -36,16 +36,6 @@ <property><name>sla_alert_contact</name></property> </parameters> - <coordinator name="webrequest-load-coord-maps"> - <app-path>${coordinator_file}</app-path> - <configuration> - <property> - <name>webrequest_source</name> - <value>maps</value> - </property> - </configuration> - </coordinator> - <coordinator name="webrequest-load-coord-misc"> <app-path>${coordinator_file}</app-path> <configuration> -- To view, visit https://gerrit.wikimedia.org/r/357769 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ie64ef1bba9b2d83e195f23c8b83b483e5c2f77d1 Gerrit-PatchSet: 1 Gerrit-Project: analytics/refinery Gerrit-Branch: master Gerrit-Owner: Elukey <ltosc...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits