Gergő Tisza has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/331100 )

Change subject: [WIP] Add Oozie jobs for wmf_raw.ApiAction -> wmf.action_*
......................................................................

[WIP] Add Oozie jobs for wmf_raw.ApiAction -> wmf.action_*

This is probably stupid but gotta start somewhere.

Bug: T137321
Change-Id: I7ecd640a45707e7c698c41e615962100d578aab7
---
A oozie/apiaction/README.md
A oozie/apiaction/bundle.properties
A oozie/apiaction/bundle.xml
A oozie/apiaction/coordinator.xml
A oozie/apiaction/create-action-tables.hql
A oozie/apiaction/load-action_action_hourly.hql
A oozie/apiaction/load-action_param_hourly-delimited.hql
A oozie/apiaction/load-action_param_hourly.hql
A oozie/apiaction/load-action_ua_hourly.hql
A oozie/apiaction/workflow.xml
10 files changed, 712 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/00/331100/1

diff --git a/oozie/apiaction/README.md b/oozie/apiaction/README.md
new file mode 100644
index 0000000..75787f4
--- /dev/null
+++ b/oozie/apiaction/README.md
@@ -0,0 +1,8 @@
+Oozie job to schedule generating dimensional rollup tables for action API 
metrics. The job runs every
+hour and aggregates data from wmf_raw.ApiAction into the various wmf.action_* 
tables.
+
+Example command for running the coordinator on command line:
+
+    oozie job -run \
+         -config coordinator.properties \
+         -D refinery_directory=hdfs://analytics-hadoop/user/tgr/refinery
diff --git a/oozie/apiaction/bundle.properties 
b/oozie/apiaction/bundle.properties
new file mode 100644
index 0000000..c45ec6b
--- /dev/null
+++ b/oozie/apiaction/bundle.properties
@@ -0,0 +1,50 @@
+# Configures a bundle to automatically manage generating Action API metrics 
from
+# the raw ApiAction data. Any of the following properties are overidable with 
-D.
+# Usage:
+#   oozie job -Duser=$USER -Dstart_time=2017-01-01T00:00Z -submit -config 
oozie/apiaction/bundle.properties
+#
+# NOTE:  The $oozie_directory must be synced to HDFS so that all relevant
+#        .xml files exist there when this job is submitted.
+
+name_node                         = hdfs://analytics-hadoop
+job_tracker                       = resourcemanager.analytics.eqiad.wmnet:8032
+queue_name                        = default
+
+#Default user
+user                              = hdfs
+
+# Base path in HDFS to refinery.
+# When submitting this job for production, you should override this to point 
directly at a deployed
+# directory name, and not the 'symbolic' 'current' directory. E.g. 
/wmf/refinery/2015-01-05T17.59.18Z--7bb7f07
+refinery_directory                = ${name_node}/wmf/refinery/current
+
+# HDFS path to artifacts that will be used by this job.
+# E.g. refinery-job.jar should exist here.
+artifacts_directory               = ${refinery_directory}/artifacts
+
+# Base path in HDFS to oozie files.
+# Other files will be used relative to this path.
+oozie_directory                   = ${refinery_directory}/oozie
+
+# HDFS path to bundle to run.
+bundle_file                       = ${oozie_directory}/apiaction/bundle.xml
+
+# HDFS path to coordinator to run for each webrequest_source.
+coordinator_file                  = 
${oozie_directory}/apiaction/coordinator.xml
+
+# HDFS path to workflow to run.
+workflow_file                     = ${oozie_directory}/apiaction/workflow.xml
+
+# Initial import time of the ApiAction dataset. FIXME
+start_time                        = 2017-01-01T00:00Z
+
+# Time to stop running this coordinator.  Year 3000 == never!
+stop_time                         = 3000-01-01T00:00Z
+
+# Workflow to send an error email
+send_error_email_workflow_file    = 
${oozie_directory}/util/send_error_email/workflow.xml
+
+# Bundle to start.
+oozie.bundle.application.path     = ${bundle_file}
+oozie.use.system.libpath          = true
+oozie.action.external.stats.write = true
diff --git a/oozie/apiaction/bundle.xml b/oozie/apiaction/bundle.xml
new file mode 100644
index 0000000..4e43c2f
--- /dev/null
+++ b/oozie/apiaction/bundle.xml
@@ -0,0 +1,181 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bundle-app xmlns="uri:oozie:bundle:0.2"
+    name="apiaction-bundle">
+
+    <parameters>
+        <property><name>name_node</name></property>
+        <property><name>job_tracker</name></property>
+        <property><name>queue_name</name></property>
+        <property><name>coordinator_file</name></property>
+        <property><name>workflow_file</name></property>
+        <property><name>send_error_email_workflow_file</name></property>
+        <property><name>start_time</name></property>
+        <property><name>stop_time</name></property>
+
+        <property><name>type</name></property>
+        <property><name>source_table</name></property>
+        <property><name>destination_table</name></property>
+        <property><name>action</name></property>
+        <property><name>param</name></property>
+    </parameters>
+
+    <coordinator name="apiaction-coord-ua">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>hive_script</name>
+                <value>load-action_ua_hourly.hql</value>
+            </property>
+            <property>
+                <name>type</name>
+                <value>ua</value>
+            </property>
+            <property>
+                <name>destination_table</name>
+                <value>action_ua_hourly</value>
+            </property>
+            <!-- unused parameters -->
+            <property>
+                <name>action</name>
+                <value></value>
+            </property>
+            <property>
+                <name>param</name>
+                <value></value>
+            </property>
+        </configuration>
+    </coordinator>
+
+    <coordinator name="apiaction-coord-action">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>hive_script</name>
+                <value>load-action_action_hourly.hql</value>
+            </property>
+            <property>
+                <name>type</name>
+                <value>action</value>
+            </property>
+            <property>
+                <name>destination_table</name>
+                <value>action_action_hourly</value>
+            </property>
+            <!-- unused parameters -->
+            <property>
+                <name>action</name>
+                <value></value>
+            </property>
+            <property>
+                <name>param</name>
+                <value></value>
+            </property>
+        </configuration>
+    </coordinator>
+
+    <coordinator name="apiaction-coord-param-delimited">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>hive_script</name>
+                <value>load-action_param_hourly-delimited.hql</value>
+            </property>
+            <property>
+                <name>type</name>
+                <value>param-delimited</value>
+            </property>
+            <property>
+                <name>destination_table</name>
+                <value>action_param_hourly</value>
+            </property>
+            <!-- unused parameters -->
+            <property>
+                <name>action</name>
+                <value></value>
+            </property>
+            <property>
+                <name>param</name>
+                <value></value>
+            </property>
+        </configuration>
+    </coordinator>
+
+    <coordinator name="apiaction-coord-param-prop">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>hive_script</name>
+                <value>load-action_param_hourly.hql</value>
+            </property>
+            <property>
+                <name>type</name>
+                <value>param-prop</value>
+            </property>
+            <property>
+                <name>destination_table</name>
+                <value>action_param_hourly</value>
+            </property>
+            <property>
+                <name>action</name>
+                <value>query</value>
+            </property>
+            <property>
+                <name>param</name>
+                <value>prop</value>
+            </property>
+        </configuration>
+    </coordinator>
+
+    <coordinator name="apiaction-coord-param-list">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>hive_script</name>
+                <value>load-action_param_hourly.hql</value>
+            </property>
+            <property>
+                <name>type</name>
+                <value>param-list</value>
+            </property>
+            <property>
+                <name>destination_table</name>
+                <value>action_param_hourly</value>
+            </property>
+            <property>
+                <name>action</name>
+                <value>query</value>
+            </property>
+            <property>
+                <name>param</name>
+                <value>list</value>
+            </property>
+        </configuration>
+    </coordinator>
+
+    <coordinator name="apiaction-coord-param-meta">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>hive_script</name>
+                <value>load-action_param_hourly.hql</value>
+            </property>
+            <property>
+                <name>type</name>
+                <value>param-meta</value>
+            </property>
+            <property>
+                <name>destination_table</name>
+                <value>action_param_hourly</value>
+            </property>
+            <property>
+                <name>action</name>
+                <value>query</value>
+            </property>
+            <property>
+                <name>param</name>
+                <value>meta</value>
+            </property>
+        </configuration>
+    </coordinator>
+
+</bundle-app>
diff --git a/oozie/apiaction/coordinator.xml b/oozie/apiaction/coordinator.xml
new file mode 100644
index 0000000..4e1f693
--- /dev/null
+++ b/oozie/apiaction/coordinator.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<coordinator-app xmlns="uri:oozie:coordinator:0.4"
+    name="apiaction-coord-${type}"
+    frequency="${coord:hours(1)}"
+    start="${start_time}"
+    end="${stop_time}"
+    timezone="Universal">
+
+    <parameters>
+
+        <!-- Required properties. -->
+        <property><name>name_node</name></property>
+        <property><name>job_tracker</name></property>
+        <property><name>queue_name</name></property>
+
+        <property><name>workflow_file</name></property>
+        <property><name>send_error_email_workflow_file</name></property>
+
+        <property><name>start_time</name></property>
+        <property><name>stop_time</name></property>
+
+        <property><name>type</name></property>
+        <property><name>source_table</name></property>
+        <property><name>destination_table</name></property>
+        <property><name>action</name></property>
+        <property><name>param</name></property>
+    </parameters>
+
+    <controls>
+        <!--
+        By having materialized jobs not timeout, we ease backfilling incidents
+        after recoverable hiccups on the dataset producers.
+        -->
+        <timeout>-1</timeout>
+
+        <!--
+        This template is used for six different jobs.
+        -->
+        <concurrency>6</concurrency>
+
+        <throttle>12</throttle>
+
+    </controls>
+
+    <action>
+        <workflow>
+            <app-path>${workflow_file}</app-path>
+            <configuration>
+
+                <property>
+                    <name>year</name>
+                    <value>${coord:formatTime(coord:nominalTime(), 
"y")}</value>
+                </property>
+                <property>
+                    <name>month</name>
+                    <value>${coord:formatTime(coord:nominalTime(), 
"M")}</value>
+                </property>
+                <property>
+                    <name>day</name>
+                    <value>${coord:formatTime(coord:nominalTime(), 
"d")}</value>
+                </property>
+                <property>
+                    <name>hour</name>
+                    <value>${coord:formatTime(coord:nominalTime(), 
"H")}</value>
+                </property>
+            </configuration>
+        </workflow>
+    </action>
+</coordinator-app>
\ No newline at end of file
diff --git a/oozie/apiaction/create-action-tables.hql 
b/oozie/apiaction/create-action-tables.hql
new file mode 100644
index 0000000..b8fa4c5
--- /dev/null
+++ b/oozie/apiaction/create-action-tables.hql
@@ -0,0 +1,64 @@
+-- Create tables for Action API stats
+--
+-- Usage:
+--     hive -f create-action-tables.hql --database wmf
+
+CREATE TABLE IF NOT EXISTS action_ua_hourly (
+  userAgent STRING COMMENT 'Raw user-agent',
+  wiki      STRING COMMENT 'Target wiki (e.g. enwiki)',
+  ipClass   STRING COMMENT 'IP based origin, can be wikimedia, wikimedia_labs 
or internet',
+  viewCount BIGINT COMMENT 'Number of requests'
+)
+COMMENT 'Hourly summary of Action API requests bucketed by user-agent and wiki'
+PARTITIONED BY (
+  year      INT COMMENT 'Unpadded year of request',
+  month     INT COMMENT 'Unpadded month of request',
+  day       INT COMMENT 'Unpadded day of request',
+  hour      INT COMMENT 'Unpadded hour of request'
+)
+STORED AS PARQUET;
+
+
+CREATE EXTERNAL TABLE IF NOT EXISTS action_action_hourly (
+  action    STRING COMMENT 'Action parameter value',
+  wiki      STRING COMMENT 'Target wiki (e.g. enwiki)',
+  ipClass   STRING COMMENT 'IP based origin, can be wikimedia, wikimedia_labs 
or internet',
+  viewCount BIGINT COMMENT 'Number of requests'
+)
+COMMENT 'Hourly summary of Action API requests bucketed by action and wiki'
+PARTITIONED BY (
+  year      INT COMMENT 'Unpadded year of request',
+  month     INT COMMENT 'Unpadded month of request',
+  day       INT COMMENT 'Unpadded day of request',
+  hour      INT COMMENT 'Unpadded hour of request'
+)
+STORED AS PARQUET;
+
+
+CREATE EXTERNAL TABLE IF NOT EXISTS action_param_hourly (
+  action    STRING COMMENT 'Action parameter value',
+  param     STRING COMMENT 'Parameter name, can be prop, list, meta, 
generator, etc',
+  value     STRING COMMENT 'Parameter value',
+  wiki      STRING COMMENT 'Target wiki (e.g. enwiki)',
+  ipClass   STRING COMMENT 'IP based origin, can be wikimedia, wikimedia_labs 
or internet',
+  viewCount BIGINT COMMENT 'Number of requests'
+)
+COMMENT 'Hourly summary of Action API requests bucketed by action, parameter, 
value and wiki'
+PARTITIONED BY (
+  year      INT COMMENT 'Unpadded year of request',
+  month     INT COMMENT 'Unpadded month of request',
+  day       INT COMMENT 'Unpadded day of request',
+  hour      INT COMMENT 'Unpadded hour of request'
+)
+STORED AS PARQUET;
+
+-- NOTE: there are many params we do not want to count distinct values of
+-- at all (eg maxlag, smaxage, maxage, requestid, origin, centralauthtoken,
+-- titles, pageids). Rather than trying to make an extensive blacklist and
+-- potentially allow new parameters to slip through which have high
+-- cardinality or sensitive information, the ETL process will use a whitelist
+-- approach to count params that have been deemed to be useful.
+--
+-- The initial whitelist is (query, prop), (query, list), (query, meta),
+-- (flow, module), (*, generator). The prop, list and meta parameters will
+-- additionally be split on '|' with each component counted separately.
diff --git a/oozie/apiaction/load-action_action_hourly.hql 
b/oozie/apiaction/load-action_action_hourly.hql
new file mode 100644
index 0000000..dfcf1bf
--- /dev/null
+++ b/oozie/apiaction/load-action_action_hourly.hql
@@ -0,0 +1,41 @@
+-- Compute hourly summary of Action API requests bucketed by action and wiki
+--
+-- Parameters:
+--     source_table      -- fully qualified table name to compute the 
aggregation from
+--     destination_table -- fully qualified table name to fill with aggregates
+--     year              -- year of partition to query
+--     month             -- month of partition to query
+--     day               -- day of partition to query
+--     hour              -- hour of partition to query
+--
+-- Usage:
+-- hive --hiveconf hive.aux.jars.path= \
+--      -f load-action_action_hourly.hql \
+--      -d source_table=wmf_raw.ApiAction \
+--      -d destination_table=wmf.action_action_hourly \
+--      -d year=2015 \
+--      -d month=11 \
+--      -d day=1 \
+--      -d hour=0
+--
+
+ADD JAR /srv/deployment/analytics/refinery/artifacts/refinery-hive.jar;
+CREATE TEMPORARY FUNCTION network_origin AS 
'org.wikimedia.analytics.refinery.hive.NetworkOriginUDF';
+
+INSERT INTO TABLE ${destination_table}
+PARTITION(year=${year}, month=${month}, day=${day}, hour=${hour})
+SELECT
+    COALESCE(params['action'], 'help') action,
+    wiki,
+    network_origin(ip) ipClass,
+    COUNT(1) viewCount
+FROM ${source_table}
+WHERE year = ${year}
+  AND month = ${month}
+  AND day = ${day}
+  AND hour = ${hour}
+GROUP BY
+    COALESCE(params['action'], 'help'),
+    wiki,
+    network_origin(ip)
+;
diff --git a/oozie/apiaction/load-action_param_hourly-delimited.hql 
b/oozie/apiaction/load-action_param_hourly-delimited.hql
new file mode 100644
index 0000000..90ca4ea
--- /dev/null
+++ b/oozie/apiaction/load-action_param_hourly-delimited.hql
@@ -0,0 +1,53 @@
+-- Compute hourly summary of Action API requests bucketed by parameter value 
and wiki,
+-- for a given action and parameter name
+--
+-- Parameters:
+--     source_table      -- fully qualified table name to compute the 
aggregation from
+--     destination_table -- fully qualified table name to fill with aggregates
+--     action            -- action to restrict query to
+--     param             -- parameter to split and load
+--     year              -- year of partition to query
+--     month             -- month of partition to query
+--     day               -- day of partition to query
+--     hour              -- hour of partition to query
+--
+-- Usage:
+-- hive --hiveconf hive.aux.jars.path= \
+--      -f load-action_param_hourly-delimited.hql \
+--      -d source_table=wmf_raw.ApiAction \
+--      -d destination_table=wmf.action_param_hourly \
+--      -d action=query \
+--      -d param=prop \
+--      -d year=2016 \
+--      -d month=4 \
+--      -d day=1 \
+--      -d hour=0
+--
+
+ADD JAR /srv/deployment/analytics/refinery/artifacts/refinery-hive.jar;
+CREATE TEMPORARY FUNCTION network_origin AS 
'org.wikimedia.analytics.refinery.hive.NetworkOriginUDF';
+
+INSERT INTO TABLE ${destination_table}
+PARTITION(year=${year}, month=${month}, day=${day}, hour=${hour})
+SELECT
+  params['action'] AS action,
+  '${param}' AS param,
+  prop AS value,
+  wiki,
+  network_origin(ip) ipClass,
+  COUNT(1) viewCount
+FROM
+  ${source_table}
+  LATERAL VIEW EXPLODE(SPLIT(params['${param}'], '\\|')) props as prop
+WHERE year = ${year}
+  AND month = ${month}
+  AND day = ${day}
+  AND hour = ${hour}
+  AND hadError = false
+  AND params['action'] = '${action}'
+GROUP BY
+  params['action'],
+  prop,
+  wiki,
+  network_origin(ip)
+;
diff --git a/oozie/apiaction/load-action_param_hourly.hql 
b/oozie/apiaction/load-action_param_hourly.hql
new file mode 100644
index 0000000..4ee728d
--- /dev/null
+++ b/oozie/apiaction/load-action_param_hourly.hql
@@ -0,0 +1,53 @@
+-- Compute hourly summary of Action API requests bucketed by parameter value 
and wiki,
+-- for the query/generator and flow/submodule parameters
+--
+-- Parameters:
+--     source_table      -- fully qualified table name to compute the 
aggregation from
+--     destination_table -- fully qualified table name to fill with aggregates
+--     year              -- year of partition to query
+--     month             -- month of partition to query
+--     day               -- day of partition to query
+--     hour              -- hour of partition to query
+--
+-- Usage:
+-- hive --hiveconf hive.aux.jars.path= \
+--      -f load-action_action_hourly.hql \
+--      -d source_table=wmf_raw.ApiAction \
+--      -d destination_table=wmf.action_param_hourly \
+--      -d year=2015 \
+--      -d month=11 \
+--      -d day=1 \
+--      -d hour=0
+--
+
+ADD JAR /srv/deployment/analytics/refinery/artifacts/refinery-hive.jar;
+CREATE TEMPORARY FUNCTION network_origin AS 
'org.wikimedia.analytics.refinery.hive.NetworkOriginUDF';
+
+INSERT INTO TABLE ${destination_table}
+PARTITION(year=${year}, month=${month}, day=${day}, hour=${hour})
+SELECT
+  COALESCE(params['action'], 'help') action,
+  pTable.key AS param,
+  pTable.value AS value,
+  wiki,
+  network_origin(ip) ipClass,
+  COUNT(1) viewCount
+FROM
+  ${source_table}
+  LATERAL VIEW EXPLODE(params) pTable AS key, value
+WHERE year = ${year}
+  AND month = ${month}
+  AND day = ${day}
+  AND hour = ${hour}
+  AND hadError = false
+  AND (
+    (params['action'] = 'flow' AND pTable.key = 'submodule')
+    OR pTable.key = 'generator'
+  )
+GROUP BY
+  COALESCE(params['action'], 'help'),
+  pTable.key,
+  pTable.value,
+  wiki,
+  network_origin(ip)
+;
diff --git a/oozie/apiaction/load-action_ua_hourly.hql 
b/oozie/apiaction/load-action_ua_hourly.hql
new file mode 100644
index 0000000..3bc96a4
--- /dev/null
+++ b/oozie/apiaction/load-action_ua_hourly.hql
@@ -0,0 +1,41 @@
+-- Compute hourly summary of Action API requests bucketed by user-agent and 
wiki
+--
+-- Parameters:
+--     source_table      -- fully qualified table name to compute the 
aggregation from
+--     destination_table -- fully qualified table name to fill with aggregates
+--     year              -- year of partition to query
+--     month             -- month of partition to query
+--     day               -- day of partition to query
+--     hour              -- hour of partition to query
+--
+-- Usage:
+-- hive --hiveconf hive.aux.jars.path= \
+--      -f load-action_ua_hourly.hql \
+--      -d source_table=wmf_raw.ApiAction \
+--      -d destination_table=wmf.action_ua_hourly \
+--      -d year=2015 \
+--      -d month=11 \
+--      -d day=1 \
+--      -d hour=0
+--
+
+ADD JAR /srv/deployment/analytics/refinery/artifacts/refinery-hive.jar;
+CREATE TEMPORARY FUNCTION network_origin AS 
'org.wikimedia.analytics.refinery.hive.NetworkOriginUDF';
+
+INSERT INTO TABLE ${destination_table}
+PARTITION(year=${year}, month=${month}, day=${day}, hour=${hour})
+SELECT
+    userAgent,
+    wiki,
+    network_origin(ip),
+    COUNT(1)
+FROM ${source_table}
+WHERE year = ${year}
+  AND month = ${month}
+  AND day = ${day}
+  AND hour = ${hour}
+GROUP BY
+    userAgent,
+    wiki,
+    network_origin(ip)
+;
diff --git a/oozie/apiaction/workflow.xml b/oozie/apiaction/workflow.xml
new file mode 100644
index 0000000..76b19d8
--- /dev/null
+++ b/oozie/apiaction/workflow.xml
@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<workflow-app xmlns="uri:oozie:workflow:0.4"
+    name="apiaction-wf-${type}-${year}-${month}-${day}-${hour}">
+
+    <parameters>
+
+        <!-- Default values for inner oozie settings -->
+        <property>
+            <name>oozie_launcher_queue_name</name>
+            <value>${queue_name}</value>
+        </property>
+        <property>
+            <name>oozie_launcher_memory</name>
+            <value>256</value>
+        </property>
+
+        <!-- Required properties -->
+        <property><name>name_node</name></property>
+        <property><name>job_tracker</name></property>
+        <property><name>queue_name</name></property>
+
+        <property>
+            <name>hive_site_xml</name>
+            <description>hive-site.xml file path in HDFS</description>
+        </property>
+        <property>
+            <name>hive_script</name>
+            <!-- This is relative to the containing directory of this file. -->
+            <description>Hive script to run.</description>
+        </property>
+        <property>
+            <name>send_error_email_workflow_file</name>
+            <description>Workflow for sending an email</description>
+        </property>
+        <property>
+            <name>type</name>
+            <description>The type of the job (describes what kind of aggregate 
data is being produced)</description>
+        </property>
+
+        <!-- Script properties -->
+        <property>
+            <name>source_table</name>
+            <description>The table to load data from (ApiAction)</description>
+        </property>
+        <property>
+            <name>destination_table</name>
+            <description>The table to aggregate data into 
(action_*)</description>
+        </property>
+        <property>
+            <!-- Some jobs will ignore this -->
+            <name>action</name>
+            <description>The API action to aggregate data for</description>
+        </property>
+        <property>
+            <!-- Some jobs will ignore this -->
+            <name>param</name>
+            <description>The API parameter to aggregate data for</description>
+        </property>
+        <property>
+            <name>year</name>
+            <description>The partition's year</description>
+        </property>
+        <property>
+            <name>month</name>
+            <description>The partition's month</description>
+        </property>
+        <property>
+            <name>day</name>
+            <description>The partition's day</description>
+        </property>
+        <property>
+            <name>hour</name>
+            <description>The partition's hour</description>
+        </property>
+    </parameters>
+
+    <start to="generate_apiaction_metrics"/>
+
+    <action name="generate_apiaction_metrics">
+        <hive xmlns="uri:oozie:hive-action:0.2">
+            <job-tracker>${job_tracker}</job-tracker>
+            <name-node>${name_node}</name-node>
+            <job-xml>${hive_site_xml}</job-xml>
+            <configuration>
+                <property>
+                    <name>mapreduce.job.queuename</name>
+                    <value>${queue_name}</value>
+                </property>
+                <!--make sure oozie:launcher runs in a low priority queue -->
+                <property>
+                    <name>oozie.launcher.mapred.job.queue.name</name>
+                    <value>${oozie_launcher_queue_name}</value>
+                </property>
+                <property>
+                    <name>oozie.launcher.mapreduce.map.memory.mb</name>
+                    <value>${oozie_launcher_memory}</value>
+                </property>
+                <property>
+                    <name>hive.exec.scratchdir</name>
+                    <value>/tmp/hive-${user}</value>
+                </property>
+            </configuration>
+            <script>${hive_script}</script>
+            <param>source_table=${source_table}</param>
+            <param>destination_table=${destination_table}</param>
+            <param>action=${action}</param>
+            <param>param=${param}</param>
+            <param>year=${year}</param>
+            <param>month=${month}</param>
+            <param>day=${day}</param>
+            <param>hour=${hour}</param>
+        </hive>
+        <ok to="end" />
+        <error to="kill" />
+    </action>
+
+    <action name="send_error_email">
+        <sub-workflow>
+            <app-path>${send_error_email_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                <property>
+                    <name>parent_name</name>
+                    <value>${wf:name()}</value>
+                </property>
+                <property>
+                    <name>parent_failed_action</name>
+                    <value>${wf:lastErrorNode()}</value>
+                </property>
+                <property>
+                    <name>parent_error_code</name>
+                    <value>${wf:errorCode(wf:lastErrorNode())}</value>
+                </property>
+                <property>
+                    <name>parent_error_message</name>
+                    <value>${wf:errorMessage(wf:lastErrorNode())}</value>
+                </property>
+                <property>
+                    <name>to</name>
+                    
<value>analytics-ale...@wikimedia.org,gti...@wikimedia.org,bd...@wikimedia.org</value>
+                </property>
+            </configuration>
+        </sub-workflow>
+        <ok to="kill"/>
+        <error to="kill"/>
+    </action>
+
+    <kill name="kill">
+        <message>Action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <end name="end"/>
+</workflow-app>

-- 
To view, visit https://gerrit.wikimedia.org/r/331100
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7ecd640a45707e7c698c41e615962100d578aab7
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Gergő Tisza <gti...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to