Ottomata has submitted this change and it was merged.

Change subject: Update oozie load job to use _IMPORTED flag
......................................................................


Update oozie load job to use _IMPORTED flag

Update the datasets file to use _IMPORTED as done flag.
Remove input dependency in coordinator to prevent the two hours wait.
Remove already set properties in coordinator.

Change-Id: I0b8bb9d40cc0d70836da87ad0164aaadaa4a80df
---
M oozie/webrequest/datasets_raw.xml
M oozie/webrequest/load/coordinator.xml
2 files changed, 5 insertions(+), 42 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/oozie/webrequest/datasets_raw.xml 
b/oozie/webrequest/datasets_raw.xml
index ac4e3be..bc9e872 100644
--- a/oozie/webrequest/datasets_raw.xml
+++ b/oozie/webrequest/datasets_raw.xml
@@ -20,35 +20,35 @@
              initial-instance="${start_time}"
              timezone="Universal">
         
<uri-template>${webrequest_raw_data_directory}/webrequest_maps/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag></done-flag>
+        <done-flag>_IMPORTED</done-flag>
     </dataset>
     <dataset name="webrequest_misc_raw_unchecked"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
              timezone="Universal">
         
<uri-template>${webrequest_raw_data_directory}/webrequest_misc/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag></done-flag>
+        <done-flag>_IMPORTED</done-flag>
     </dataset>
     <dataset name="webrequest_mobile_raw_unchecked"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
              timezone="Universal">
         
<uri-template>${webrequest_raw_data_directory}/webrequest_mobile/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag></done-flag>
+        <done-flag>_IMPORTED</done-flag>
     </dataset>
     <dataset name="webrequest_text_raw_unchecked"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
              timezone="Universal">
         
<uri-template>${webrequest_raw_data_directory}/webrequest_text/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag></done-flag>
+        <done-flag>_IMPORTED</done-flag>
     </dataset>
     <dataset name="webrequest_upload_raw_unchecked"
              frequency="${coord:hours(1)}"
              initial-instance="${start_time}"
              timezone="Universal">
         
<uri-template>${webrequest_raw_data_directory}/webrequest_upload/hourly/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
-        <done-flag></done-flag>
+        <done-flag>_IMPORTED</done-flag>
     </dataset>
 
     <!--
diff --git a/oozie/webrequest/load/coordinator.xml 
b/oozie/webrequest/load/coordinator.xml
index a5e0694..71fb41f 100644
--- a/oozie/webrequest/load/coordinator.xml
+++ b/oozie/webrequest/load/coordinator.xml
@@ -81,40 +81,12 @@
         <data-in name="input" 
dataset="webrequest_${webrequest_source}_raw_unchecked">
             <instance>${coord:current(0)}</instance>
         </data-in>
-        <!--
-        In order to prevent the job from running too early, we wait
-        for the dataset that is 2 hours in the future (1 hour is too
-        little, as the directory for the dataset 1 hour in the future
-        might get created /before/ writing for the current dataset
-        finishes).
-         -->
-        <data-in name="ready_indicator" 
dataset="webrequest_${webrequest_source}_raw_unchecked">
-            <instance>${coord:current(2)}</instance>
-        </data-in>
     </input-events>
 
     <action>
         <workflow>
             <app-path>${workflow_file}</app-path>
             <configuration>
-
-                <!-- Pass these properties through to the workflow -->
-                
<property><name>name_node</name><value>${name_node}</value></property>
-                
<property><name>job_tracker</name><value>${job_tracker}</value></property>
-                
<property><name>queue_name</name><value>${queue_name}</value></property>
-
-                <property>
-                    <name>add_partition_workflow_file</name>
-                    <value>${add_partition_workflow_file}</value>
-                </property>
-                <property>
-                    <name>hive_site_xml</name>
-                    <value>${hive_site_xml}</value>
-                </property>
-                <property>
-                    <name>table</name>
-                    <value>${table}</value>
-                </property>
                 <property>
                     <name>year</name>
                     <value>${coord:formatTime(coord:nominalTime(), 
"y")}</value>
@@ -135,15 +107,6 @@
                     <name>location</name>
                     <value>${coord:dataIn('input')}</value>
                 </property>
-                <property>
-                    <name>statistics_table</name>
-                    <value>${statistics_table}</value>
-                </property>
-                <property>
-                    <name>faulty_hosts_directory</name>
-                    <value>${faulty_hosts_directory}</value>
-                </property>
-
             </configuration>
         </workflow>
     </action>

-- 
To view, visit https://gerrit.wikimedia.org/r/249373
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I0b8bb9d40cc0d70836da87ad0164aaadaa4a80df
Gerrit-PatchSet: 2
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <j...@wikimedia.org>
Gerrit-Reviewer: Nuria <nu...@wikimedia.org>
Gerrit-Reviewer: Ottomata <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to