Joal has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/360327 )

Change subject: Rename unique devices project-wide
......................................................................

Rename unique devices project-wide

We have reconsidered the name of the project-wide unique devices
dataset, and think it will be better to name it per_project_class,
where a project_class is wikipedia or wikivoyage for instance.

Change-Id: If5298a9ce5ae03483b52fd6fb251a91fadf80abe
---
R 
hive/unique_devices/per_project_class/create_unique_devices_per_project_class_daily_table.hql
R 
hive/unique_devices/per_project_class/create_unique_devices_per_project_class_monthly_table.hql
M oozie/unique_devices/README.md
M oozie/unique_devices/datasets.xml
R oozie/unique_devices/per_project_class/daily/coordinator.properties
R oozie/unique_devices/per_project_class/daily/coordinator.xml
R 
oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily.hql
R 
oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily_to_archive.hql
R oozie/unique_devices/per_project_class/daily/workflow.xml
R oozie/unique_devices/per_project_class/druid/daily/coordinator.properties
R oozie/unique_devices/per_project_class/druid/daily/coordinator.xml
A 
oozie/unique_devices/per_project_class/druid/daily/generate_druid_unique_devices_per_project_class_daily.hql
R 
oozie/unique_devices/per_project_class/druid/daily/load_unique_devices_per_project_class_daily.json.template
R oozie/unique_devices/per_project_class/druid/daily/workflow.xml
R oozie/unique_devices/per_project_class/monthly/coordinator.properties
R oozie/unique_devices/per_project_class/monthly/coordinator.xml
R 
oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly.hql
R 
oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly_to_archive.hql
R oozie/unique_devices/per_project_class/monthly/workflow.xml
D 
oozie/unique_devices/project_wide/druid/daily/generate_druid_unique_devices_project_wide_daily.hql
20 files changed, 204 insertions(+), 204 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/27/360327/1

diff --git 
a/hive/unique_devices/project_wide/create_unique_devices_project_wide_daily_table.hql
 
b/hive/unique_devices/per_project_class/create_unique_devices_per_project_class_daily_table.hql
similarity index 68%
rename from 
hive/unique_devices/project_wide/create_unique_devices_project_wide_daily_table.hql
rename to 
hive/unique_devices/per_project_class/create_unique_devices_per_project_class_daily_table.hql
index ece0e2e..3440f69 100644
--- 
a/hive/unique_devices/project_wide/create_unique_devices_project_wide_daily_table.hql
+++ 
b/hive/unique_devices/per_project_class/create_unique_devices_per_project_class_daily_table.hql
@@ -1,14 +1,14 @@
--- Creates table statement for unique devices project-wide daily table.
+-- Creates table statement for unique devices per project-class daily table.
 --
 -- Parameters:
 --     <none>
 --
 -- Usage
---     hive -f create_unique_devices_project_wide_daily_table.hql --database 
wmf
+--     hive -f create_unique_devices_per_project_class_daily_table.hql 
--database wmf
 
 
-CREATE EXTERNAL TABLE IF NOT EXISTS `unique_devices_project_wide_daily`(
-    `project`              string  COMMENT 'The lower cased project (wikipedia 
for instance)',
+CREATE EXTERNAL TABLE IF NOT EXISTS `unique_devices_per_project_class_daily`(
+    `project_class`        string  COMMENT 'The lower cased project-class 
(wikipedia for instance)',
     `country`              string  COMMENT 'Country name of the accessing 
agents (computed using maxmind GeoIP database)',
     `country_code`         string  COMMENT '2 letter country code',
     `uniques_underestimate` int    COMMENT 'Under estimation of unique devices 
seen based on last-access-global cookie, and the nocookies header',
@@ -21,5 +21,5 @@
     `day`                   int    COMMENT 'Unpadded day of requests'
 )
 STORED AS PARQUET
-LOCATION '/wmf/data/wmf/unique_devices/project_wide/daily'
+LOCATION '/wmf/data/wmf/unique_devices/per_project_class/daily'
 ;
diff --git 
a/hive/unique_devices/project_wide/create_unique_devices_project_wide_monthly_table.hql
 
b/hive/unique_devices/per_project_class/create_unique_devices_per_project_class_monthly_table.hql
similarity index 72%
rename from 
hive/unique_devices/project_wide/create_unique_devices_project_wide_monthly_table.hql
rename to 
hive/unique_devices/per_project_class/create_unique_devices_per_project_class_monthly_table.hql
index 74251b7..df42ab9 100644
--- 
a/hive/unique_devices/project_wide/create_unique_devices_project_wide_monthly_table.hql
+++ 
b/hive/unique_devices/per_project_class/create_unique_devices_per_project_class_monthly_table.hql
@@ -4,11 +4,11 @@
 --     <none>
 --
 -- Usage
---     hive -f create_unique_devices_project_wide_monthly_table.hql --database 
wmf
+--     hive -f create_unique_devices_per_project_class_monthly_table.hql 
--database wmf
 
 
-CREATE EXTERNAL TABLE IF NOT EXISTS `unique_devices_project_wide_monthly`(
-    `project`              string  COMMENT 'The lower cased project (wikipedia 
for instance)',
+CREATE EXTERNAL TABLE IF NOT EXISTS `unique_devices_per_project_class_monthly`(
+    `project_class`        string  COMMENT 'The lower cased project-class 
(wikipedia for instance)',
     `country`              string  COMMENT 'Country name of the accessing 
agents (computed using maxmind GeoIP database)',
     `country_code`         string  COMMENT '2 letter country code',
     `uniques_underestimate` int    COMMENT 'Under estimation of unique devices 
seen based on last-access-global cookie, and the nocookies header',
@@ -20,5 +20,5 @@
     `month`                 int    COMMENT 'Unpadded month of requests'
 )
 STORED AS PARQUET
-LOCATION '/wmf/data/wmf/unique_devices/project_wide/monthly'
+LOCATION '/wmf/data/wmf/unique_devices/per_project_class/monthly'
 ;
diff --git a/oozie/unique_devices/README.md b/oozie/unique_devices/README.md
index 2ced214..04cefcb 100644
--- a/oozie/unique_devices/README.md
+++ b/oozie/unique_devices/README.md
@@ -1,9 +1,9 @@
-Contains jobs computing per-domain and project-wide unique devices
+Contains jobs computing per-domain and per-project-class unique devices
 based on last-access cookies.
 Also contains jobs to load druid with hive-computed values.
 
 
-Project-wide jobs are currently launched as:
+Per-project-class jobs are currently launched as:
 
 ```
 # Daily computation
@@ -13,7 +13,7 @@
   -Doozie_launcher_queue_name=production \
   -Doozie_launcher_memory=2048 \
   -Dstart_time=2015-12-01T00:00Z \
-  -config project_wide/daily/coordinator.properties \
+  -config per_project_class/daily/coordinator.properties \
   -run
 
 # Monthly computation
@@ -23,7 +23,7 @@
   -Doozie_launcher_queue_name=production \
   -Doozie_launcher_memory=2048 \
   -Dstart_time=2015-12-01T00:00Z \
-  -config project_wide/monthly/coordinator.properties \
+  -config per_project_class/monthly/coordinator.properties \
   -run
 
 # Daily druid loading
@@ -33,7 +33,7 @@
   -Doozie_launcher_queue_name=production \
   -Doozie_launcher_memory=2048 \
   -Dstart_time=2015-12-01T00:00Z \
-  -config project_wide/druid/daily/coordinator.properties \
+  -config per_project_class/druid/daily/coordinator.properties \
   -run
 ```
 
diff --git a/oozie/unique_devices/datasets.xml 
b/oozie/unique_devices/datasets.xml
index 2113ffc..44e0979 100644
--- a/oozie/unique_devices/datasets.xml
+++ b/oozie/unique_devices/datasets.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-Defines reusable datasets for unique devicess data.
+Defines reusable datasets for unique devices data.
 Use this dataset in your coordinator.xml files by setting:
 
     ${start_time}     - the initial instance of your data.
@@ -13,8 +13,8 @@
 <datasets>
 
     <!--
-    The unique_devices_project_wide_[daily,monthly] datasets contain 
project-wide
-    uniques devices per project and country, counted using the last_access 
method.
+    The unique_devices_per_project_class_[daily,monthly] datasets contain 
uniques
+    devices per project-class and country, counted using the last_access 
method.
 
     Note that we do not use “${...}” but “${"$"}{...}", as dataset files are
     passed to EL twice in cascade, and in the first EL level, ${MONTH}
@@ -24,19 +24,19 @@
     at “${...}” as input for the second EL level. There, the variables hold
     their expected values, and we can start unpadding them.
     -->
-    <dataset name="unique_devices_project_wide_daily"
+    <dataset name="unique_devices_per_project_class_daily"
              frequency="${coord:days(1)}"
              initial-instance="${start_time}"
              timezone="Universal">
-        
<uri-template>${unique_devices_data_directory}/project_wide/daily/year=${YEAR}/month=${"$"}{MONTH
 + 0}/day=${"$"}{DAY + 0}</uri-template>
+        
<uri-template>${unique_devices_data_directory}/per_project_class/daily/year=${YEAR}/month=${"$"}{MONTH
 + 0}/day=${"$"}{DAY + 0}</uri-template>
         <done-flag>_SUCCESS</done-flag>
     </dataset>
 
-    <dataset name="unique_devices_project_wide_monthly"
+    <dataset name="unique_devices_per_project_class_monthly"
              frequency="${coord:months(1)}"
              initial-instance="${start_time}"
              timezone="Universal">
-        
<uri-template>${unique_devices_data_directory}/project_wide/monthly/year=${YEAR}/month=${"$"}{MONTH
 + 0}</uri-template>
+        
<uri-template>${unique_devices_data_directory}/per_project_class/monthly/year=${YEAR}/month=${"$"}{MONTH
 + 0}</uri-template>
         <done-flag>_SUCCESS</done-flag>
     </dataset>
 
diff --git a/oozie/unique_devices/project_wide/daily/coordinator.properties 
b/oozie/unique_devices/per_project_class/daily/coordinator.properties
similarity index 75%
rename from oozie/unique_devices/project_wide/daily/coordinator.properties
rename to oozie/unique_devices/per_project_class/daily/coordinator.properties
index e703cad..aaf42f2 100644
--- a/oozie/unique_devices/project_wide/daily/coordinator.properties
+++ b/oozie/unique_devices/per_project_class/daily/coordinator.properties
@@ -1,10 +1,10 @@
-# Configures a coordinator to generate daily project-wide unique devices
+# Configures a coordinator to generate daily per-project-class unique devices
 # using last-access method on webrequest table and save a reduced version
 # (project aggregates except wikimedia) in archive folders.
 #
 # Any of the following properties are overidable with -D.
 # Usage:
-#   oozie job -Duser=$USER -Dstart_time=2017-04-01T00:00Z -submit -config 
oozie/unique_devices/project_wide/daily/coordinator.properties
+#   oozie job -Duser=$USER -Dstart_time=2017-04-01T00:00Z -submit -config 
oozie/unique_devices/per_project_class/daily/coordinator.properties
 #
 # NOTE:  The $oozie_directory must be synced to HDFS so that all relevant
 #        .xml files exist there when this job is submitted.
@@ -35,21 +35,21 @@
 webrequest_data_directory         = ${name_node}/wmf/data/wmf/webrequest
 webrequest_datasets_file          = ${oozie_directory}/webrequest/datasets.xml
 
-# unique_devices_project_wide_daily related settings
-unique_devices_project_wide_daily_table       = 
wmf.unique_devices_project_wide_daily
-unique_devices_data_directory                 = 
${name_node}/wmf/data/wmf/unique_devices
-unique_devices_datasets_file                  = 
${oozie_directory}/unique_devices/datasets.xml
-unique_devices_project_wide_archive_directory = 
${archive_directory}/unique_devices/project_wide
+# unique_devices_per_project_class_daily related settings
+unique_devices_per_project_class_daily_table       = 
wmf.unique_devices_per_project_class_daily
+unique_devices_data_directory                      = 
${name_node}/wmf/data/wmf/unique_devices
+unique_devices_datasets_file                       = 
${oozie_directory}/unique_devices/datasets.xml
+unique_devices_per_project_class_archive_directory = 
${archive_directory}/unique_devices/per_project_class
 
 # refinery-hive jar path for RedirectToPageview UDF
 refinery_hive_jar                 = 
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-hive-0.0.46.jar
 
 user                              = hdfs
-workflow_file                     = 
${oozie_directory}/unique_devices/project_wide/daily/workflow.xml
+workflow_file                     = 
${oozie_directory}/unique_devices/per_project_class/daily/workflow.xml
 
 temporary_directory               = ${name_node}/tmp
 
 # Coordinator app to run.
 oozie.use.system.libpath          = true
 oozie.action.external.stats.write = true
-oozie.coord.application.path      = 
${oozie_directory}/unique_devices/project_wide/daily/coordinator.xml
+oozie.coord.application.path      = 
${oozie_directory}/unique_devices/per_project_class/daily/coordinator.xml
diff --git a/oozie/unique_devices/project_wide/daily/coordinator.xml 
b/oozie/unique_devices/per_project_class/daily/coordinator.xml
similarity index 86%
rename from oozie/unique_devices/project_wide/daily/coordinator.xml
rename to oozie/unique_devices/per_project_class/daily/coordinator.xml
index a871da5..2ae3384 100644
--- a/oozie/unique_devices/project_wide/daily/coordinator.xml
+++ b/oozie/unique_devices/per_project_class/daily/coordinator.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <coordinator-app xmlns="uri:oozie:coordinator:0.4"
-    name="unique_devices-project_wide-daily-coord"
+    name="unique_devices-per_project_class-daily-coord"
     frequency="${coord:days(1)}"
     start="${start_time}"
     end="${stop_time}"
@@ -22,10 +22,10 @@
         <property><name>webrequest_data_directory</name></property>
         <property><name>webrequest_datasets_file</name></property>
 
-        
<property><name>unique_devices_project_wide_daily_table</name></property>
+        
<property><name>unique_devices_per_project_class_daily_table</name></property>
         <property><name>unique_devices_data_directory</name></property>
         <property><name>unique_devices_datasets_file</name></property>
-        
<property><name>unique_devices_project_wide_archive_directory</name></property>
+        
<property><name>unique_devices_per_project_class_archive_directory</name></property>
 
         <property><name>temporary_directory</name></property>
 
@@ -58,7 +58,7 @@
     </input-events>
 
     <output-events>
-        <data-out name="unique_devices_project_wide_daily_output" 
dataset="unique_devices_project_wide_daily">
+        <data-out name="unique_devices_per_project_class_daily_output" 
dataset="unique_devices_per_project_class_daily">
             <instance>${coord:current(0)}</instance>
         </data-out>
     </output-events>
@@ -95,8 +95,8 @@
                   <value>${coord:formatTime(coord:nominalTime(), "dd")}</value>
               </property>
               <property>
-                    
<name>unique_devices_project_wide_daily_current_directory</name>
-                    
<value>${coord:dataOut('unique_devices_project_wide_daily_output')}</value>
+                    
<name>unique_devices_per_project_class_daily_current_directory</name>
+                    
<value>${coord:dataOut('unique_devices_per_project_class_daily_output')}</value>
                 </property>
           </configuration>
       </workflow>
diff --git 
a/oozie/unique_devices/project_wide/daily/unique_devices_project_wide_daily.hql 
b/oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily.hql
similarity index 77%
rename from 
oozie/unique_devices/project_wide/daily/unique_devices_project_wide_daily.hql
rename to 
oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily.hql
index 206b8e5..48642ca 100644
--- 
a/oozie/unique_devices/project_wide/daily/unique_devices_project_wide_daily.hql
+++ 
b/oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily.hql
@@ -1,4 +1,4 @@
--- Generates unique devices project-wide daily based on WMF-Last-Access-global 
cookie
+-- Generates unique devices per-project-class daily based on 
WMF-Last-Access-global cookie
 --
 -- Parameters:
 --     source_table        -- Table containing source data
@@ -9,9 +9,9 @@
 --     refinery_hive_jar   -- The path to refinery-hive jar for UDF
 --
 -- Usage:
---     hive -f unique_devices_project_wide_daily.hql \
+--     hive -f unique_devices_per_project_class_daily.hql \
 --         -d source_table=wmf.webrequest \
---         -d destination_table=wmf.unique_devices_project_wide_daily \
+--         -d destination_table=wmf.unique_devices_per_project_class_daily \
 --         -d year=2017 \
 --         -d month=4 \
 --         -d day=1  \
@@ -30,7 +30,7 @@
         year,
         month,
         day,
-        normalized_host.project_class AS project,
+        normalized_host.project_class AS project_class,
         geocoded_data['country'] AS country,
         geocoded_data['country_code'] AS country_code,
         unix_timestamp(x_analytics_map['WMF-Last-Access-Global'], 
'dd-MMM-yyyy') AS last_access_global,
@@ -53,29 +53,29 @@
 -- (fresh sessions are not counted with last_access method)
 fresh_sessions_aggregated AS (
     SELECT
-        project,
+        project_class,
         country,
         country_code,
         COUNT(1) AS uniques_offset
     FROM (
         SELECT
-            hash(ip, user_agent, accept_language, project) AS id,
-            project,
+            hash(ip, user_agent, accept_language, project_class) AS id,
+            project_class,
             country,
             country_code,
             SUM(CASE WHEN (nocookies IS NOT NULL) THEN 1 ELSE 0 END)
         FROM
             last_access_dates
         GROUP BY
-            hash(ip, user_agent, accept_language, project),
-            project,
+            hash(ip, user_agent, accept_language, project_class),
+            project_class,
             country,
             country_code
         -- Only keeping clients having done 1 event without cookies
         HAVING SUM(CASE WHEN (nocookies IS NOT NULL) THEN 1 ELSE 0 END) = 1
         ) fresh_sessions
     GROUP BY
-        project,
+        project_class,
         country,
         country_code
 )
@@ -83,12 +83,12 @@
     PARTITION(year = ${year}, month = ${month}, day = ${day})
 
 SELECT
-    COALESCE(la.project, fresh.project) as project,
+    COALESCE(la.project_class, fresh.project_class) as project_class,
     COALESCE(la.country, fresh.country) as country,
     COALESCE(la.country_code, fresh.country_code) as country_code,
     SUM(CASE
-        -- project set, last-access-global not set and client accept cookies 
--> first visit, count
-        WHEN (la.project IS NOT NULL AND la.last_access_global IS NULL AND 
la.nocookies is NULL) THEN 1
+        -- project_class set, last-access-global not set and client accept 
cookies --> first visit, count
+        WHEN (la.project_class IS NOT NULL AND la.last_access_global IS NULL 
AND la.nocookies is NULL) THEN 1
         -- last-access-global set and its date is before today --> First visit 
today, count
         WHEN ((la.last_access_global IS NOT NULL)
             AND (la.last_access_global < unix_timestamp(CONCAT('${year}-', 
LPAD('${month}', 2, '0'), '-', LPAD('${day}', 2, '0')), 'yyyy-MM-dd'))) THEN 1
@@ -97,8 +97,8 @@
     END) AS uniques_underestimate,
     COALESCE(fresh.uniques_offset, 0) AS uniques_offset,
     SUM(CASE
-        -- project set, last-access-global not set and client accept cookies 
--> first visit, count
-        WHEN (la.project IS NOT NULL AND la.last_access_global IS NULL AND 
la.nocookies is NULL) THEN 1
+        -- project_class set, last-access-global not set and client accept 
cookies --> first visit, count
+        WHEN (la.project_class IS NOT NULL AND la.last_access_global IS NULL 
AND la.nocookies is NULL) THEN 1
         -- last-access-global set and its date is before today --> First visit 
today, count
         WHEN ((la.last_access_global IS NOT NULL)
             AND (la.last_access_global < unix_timestamp(CONCAT('${year}-', 
LPAD('${month}', 2, '0'), '-', LPAD('${day}', 2, '0')), 'yyyy-MM-dd'))) THEN 1
@@ -109,10 +109,10 @@
     last_access_dates AS la
     -- Outer join to keep every row from both table
     FULL OUTER JOIN fresh_sessions_aggregated AS fresh
-        ON (la.project = fresh.project
+        ON (la.project_class = fresh.project_class
           AND la.country_code = fresh.country_code)
 GROUP BY
-    COALESCE(la.project, fresh.project),
+    COALESCE(la.project_class, fresh.project_class),
     COALESCE(la.country, fresh.country),
     COALESCE(la.country_code, fresh.country_code),
     COALESCE(fresh.uniques_offset, 0)
diff --git 
a/oozie/unique_devices/project_wide/daily/unique_devices_project_wide_daily_to_archive.hql
 
b/oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily_to_archive.hql
similarity index 80%
rename from 
oozie/unique_devices/project_wide/daily/unique_devices_project_wide_daily_to_archive.hql
rename to 
oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily_to_archive.hql
index 07eac66..7a38894 100644
--- 
a/oozie/unique_devices/project_wide/daily/unique_devices_project_wide_daily_to_archive.hql
+++ 
b/oozie/unique_devices/per_project_class/daily/unique_devices_per_project_class_daily_to_archive.hql
@@ -1,4 +1,4 @@
--- Aggregate unique devices project-wide daily by project and remove wikimedia.
+-- Aggregate unique devices per-project-class daily by project_class and 
remove wikimedia.
 --
 -- Parameters:
 --     source_table           -- Table containing source data
@@ -8,9 +8,9 @@
 --     day                    -- day of the to-be-generated
 --
 -- Usage:
---     hive -f unique_devices_project_wide_daily_to_archive.hql \
---         -d source_table=wmf.unique_devices_project_wide_daily \
---         -d 
destination_directory=/tmp/archive/unique_devices/project_wide_daily \
+--     hive -f unique_devices_per_project_class_daily_to_archive.hql \
+--         -d source_table=wmf.unique_devices_per_project_class_daily \
+--         -d 
destination_directory=/tmp/archive/unique_devices/per_project_class \
 --         -d year=2017 \
 --         -d month=4 \
 --         -d day=1
@@ -28,13 +28,13 @@
     -- Set 0 as volume column since we don't use it.
     SELECT
         CONCAT_WS('\t',
-          project,
+          project_class,
           cast(uniques_underestimate AS string),
           cast(uniques_offset AS string),
           cast(uniques_estimate AS string)) AS line
     FROM (
         SELECT
-            project,
+            project_class,
             SUM(uniques_underestimate) AS uniques_underestimate,
             SUM(uniques_offset) AS uniques_offset,
             SUM(uniques_estimate) AS uniques_estimate
@@ -42,9 +42,9 @@
         WHERE year=${year}
             AND month=${month}
             AND day=${day}
-            AND project != 'wikimedia'
+            AND project_class != 'wikimedia'
         GROUP BY
-            project
+            project_class
         ORDER BY
             uniques_estimate DESC
         LIMIT 100000000
diff --git a/oozie/unique_devices/project_wide/daily/workflow.xml 
b/oozie/unique_devices/per_project_class/daily/workflow.xml
similarity index 89%
rename from oozie/unique_devices/project_wide/daily/workflow.xml
rename to oozie/unique_devices/per_project_class/daily/workflow.xml
index c75f3dc..d3f109e 100644
--- a/oozie/unique_devices/project_wide/daily/workflow.xml
+++ b/oozie/unique_devices/per_project_class/daily/workflow.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <workflow-app xmlns="uri:oozie:workflow:0.4"
-    name="unique_devices-project_wide-daily-wf-${year}-${month}-${day}">
+    name="unique_devices-per_project_class-daily-wf-${year}-${month}-${day}">
     <parameters>
 
         <!-- Default values for oozie settings -->
@@ -28,7 +28,7 @@
             <description>Hive table to read data from.</description>
         </property>
         <property>
-            <name>unique_devices_project_wide_daily_table</name>
+            <name>unique_devices_per_project_class_daily_table</name>
             <description>The table to store project-wide daily unique devices 
data in.</description>
         </property>
         <property>
@@ -60,11 +60,11 @@
             <description>Path to refinery-hive jar containing 
UDFs</description>
         </property>
         <property>
-            <name>unique_devices_project_wide_daily_current_directory</name>
+            
<name>unique_devices_per_project_class_daily_current_directory</name>
             <description>Directory where the currently computed data is 
stored</description>
         </property>
         <property>
-            <name>unique_devices_project_wide_archive_directory</name>
+            <name>unique_devices_per_project_class_archive_directory</name>
             <description>Directory where the archived data is 
stored</description>
         </property>
         <property>
@@ -81,9 +81,9 @@
         </property>
     </parameters>
 
-    <start to="compute_unique_devices_project_wide_daily"/>
+    <start to="compute_unique_devices_per_project_class_daily"/>
 
-    <action name="compute_unique_devices_project_wide_daily">
+    <action name="compute_unique_devices_per_project_class_daily">
         <hive xmlns="uri:oozie:hive-action:0.2">
             <job-tracker>${job_tracker}</job-tracker>
             <name-node>${name_node}</name-node>
@@ -112,9 +112,9 @@
                     <value>/tmp/hive-${user}</value>
                 </property>
             </configuration>
-            <script>unique_devices_project_wide_daily.hql</script>
+            <script>unique_devices_per_project_class_daily.hql</script>
             <param>source_table=${webrequest_table}</param>
-            
<param>destination_table=${unique_devices_project_wide_daily_table}</param>
+            
<param>destination_table=${unique_devices_per_project_class_daily_table}</param>
             <param>year=${year}</param>
             <param>month=${month}</param>
             <param>day=${day}</param>
@@ -130,7 +130,7 @@
             <configuration>
                 <property>
                     <name>directory</name>
-                    
<value>${unique_devices_project_wide_daily_current_directory}</value>
+                    
<value>${unique_devices_per_project_class_daily_current_directory}</value>
                 </property>
             </configuration>
         </sub-workflow>
@@ -168,10 +168,10 @@
                 </property>
             </configuration>
 
-            <script>unique_devices_project_wide_daily_to_archive.hql</script>
+            
<script>unique_devices_per_project_class_daily_to_archive.hql</script>
             <!-- Here, the source for archive is the
                  destination of the previous job -->
-            
<param>source_table=${unique_devices_project_wide_daily_table}</param>
+            
<param>source_table=${unique_devices_per_project_class_daily_table}</param>
             <param>year=${year}</param>
             <param>month=${month}</param>
             <param>day=${day}</param>
@@ -211,7 +211,7 @@
                 </property>
                 <property>
                     <name>archive_file</name>
-                    
<value>${unique_devices_project_wide_archive_directory}/${padded_year}/${padded_year}-${padded_month}/unique_devices_project_wide_daily-${padded_year}-${padded_month}-${padded_day}.gz</value>
+                    
<value>${unique_devices_per_project_class_archive_directory}/${padded_year}/${padded_year}-${padded_month}/unique_devices_per_project_class_daily-${padded_year}-${padded_month}-${padded_day}.gz</value>
                 </property>
             </configuration>
         </sub-workflow>
diff --git 
a/oozie/unique_devices/project_wide/druid/daily/coordinator.properties 
b/oozie/unique_devices/per_project_class/druid/daily/coordinator.properties
similarity index 81%
rename from oozie/unique_devices/project_wide/druid/daily/coordinator.properties
rename to 
oozie/unique_devices/per_project_class/druid/daily/coordinator.properties
index 3abd6ac..ee0699a 100644
--- a/oozie/unique_devices/project_wide/druid/daily/coordinator.properties
+++ b/oozie/unique_devices/per_project_class/druid/daily/coordinator.properties
@@ -1,9 +1,9 @@
-# Configures a coordinator to generate a day of JSON daily project-wide
+# Configures a coordinator to generate a day of JSON daily per-project-class
 # unique devices and load them in druid.
 #
 # Any of the following properties are override-able with -D.
 # Usage:
-#   oozie job -Dstart_time=2017-04-01T00:00Z -submit -config 
oozie/uniques_devices/project_wide/druid/daily/coordinator.properties
+#   oozie job -Dstart_time=2017-04-01T00:00Z -submit -config 
oozie/uniques_devices/per_project_class/druid/daily/coordinator.properties
 #
 # NOTE:  The $refinery_directory must be synced to HDFS so that all relevant
 #        .xml files exist there when this job is submitted.
@@ -28,16 +28,16 @@
 hive_site_xml                       = ${name_node}/user/hive/hive-site.xml
 
 # HDFS path to coordinator to run.
-coordinator_file                    = 
${oozie_directory}/unique_devices/project_wide/druid/daily/coordinator.xml
+coordinator_file                    = 
${oozie_directory}/unique_devices/per_project_class/druid/daily/coordinator.xml
 # HDFS path to workflow to run.
-workflow_file                       = 
${oozie_directory}/unique_devices/project_wide/druid/daily/workflow.xml
+workflow_file                       = 
${oozie_directory}/unique_devices/per_project_class/druid/daily/workflow.xml
 
-# unique devices project-wide settings
+# unique devices per-project-class settings
 unique_devices_datasets_file        = 
${oozie_directory}/unique_devices/datasets.xml
 unique_devices_data_directory       = ${name_node}/wmf/data/wmf/unique_devices
-unique_devices_project_wide_daily_table  = 
wmf.unique_devices_project_wide_daily
+unique_devices_per_project_class_daily_table  = 
wmf.unique_devices_per_project_class_daily
 
-# Initial import time of the unique_devices_project_wide_daily dataset.
+# Initial import time of the unique_devices_per_project_class_daily dataset.
 start_time                          = 2017-04-01T00:00Z
 
 # Time to stop running this coordinator.  Year 3000 == never!
@@ -47,7 +47,7 @@
 temporary_directory                 = ${name_node}/tmp
 
 # HDFS path to template to use.
-druid_template_file                 = 
${oozie_directory}/unique_devices/project_wide/druid/daily/load_unique_devices_project_wide_daily.json.template
+druid_template_file                 = 
${oozie_directory}/unique_devices/per_project_class/druid/daily/load_unique_devices_per_project_class_daily.json.template
 # Druid overlord url
 druid_overlord_url                  = http://druid1001.eqiad.wmnet:8090
 
diff --git a/oozie/unique_devices/project_wide/druid/daily/coordinator.xml 
b/oozie/unique_devices/per_project_class/druid/daily/coordinator.xml
similarity index 90%
rename from oozie/unique_devices/project_wide/druid/daily/coordinator.xml
rename to oozie/unique_devices/per_project_class/druid/daily/coordinator.xml
index c135d97..136d6a9 100644
--- a/oozie/unique_devices/project_wide/druid/daily/coordinator.xml
+++ b/oozie/unique_devices/per_project_class/druid/daily/coordinator.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <coordinator-app xmlns="uri:oozie:coordinator:0.4"
-    name="unique_devices-project_wide-druid-daily-coord"
+    name="unique_devices-per_project_class-druid-daily-coord"
     frequency="${coord:days(1)}"
     start="${start_time}"
     end="${stop_time}"
@@ -17,7 +17,7 @@
         <property><name>workflow_file</name></property>
         <property><name>unique_devices_datasets_file</name></property>
         <property><name>unique_devices_data_directory</name></property>
-        
<property><name>unique_devices_project_wide_daily_table</name></property>
+        
<property><name>unique_devices_per_project_class_daily_table</name></property>
 
         <property><name>start_time</name></property>
         <property><name>stop_time</name></property>
@@ -49,7 +49,7 @@
     </datasets>
 
     <input-events>
-        <data-in name="unique_devices_project_wide_daily_input" 
dataset="unique_devices_project_wide_daily">
+        <data-in name="unique_devices_per_project_class_daily_input" 
dataset="unique_devices_per_project_class_daily">
             <!-- Dataset uses day as frequency -->
             <instance>${coord:current(0)}</instance>
         </data-in>
diff --git 
a/oozie/unique_devices/per_project_class/druid/daily/generate_druid_unique_devices_per_project_class_daily.hql
 
b/oozie/unique_devices/per_project_class/druid/daily/generate_druid_unique_devices_per_project_class_daily.hql
new file mode 100644
index 0000000..bebff9a
--- /dev/null
+++ 
b/oozie/unique_devices/per_project_class/druid/daily/generate_druid_unique_devices_per_project_class_daily.hql
@@ -0,0 +1,55 @@
+-- Extracts one day of json formatted daily unique devices
+-- per-project-class to be loaded in Druid
+--
+-- Usage:
+--     hive -f generate_druid_unique_devices_per_project_class_daily.hql \
+--         -d source_table=wmf.unique_devices_per_project_class_daily \
+--         -d 
destination_directory=/tmp/druid/unique_devices_per_project_class_daily_json \
+--         -d year=2017 \
+--         -d month=4 \
+--         -d day=1
+--
+
+SET hive.exec.compress.output=true;
+SET 
mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec;
+
+
+ADD JAR /usr/lib/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar;
+
+
+DROP TABLE IF EXISTS 
tmp_druid_unique_devices_per_project_class_daily_${year}_${month}_${day};
+
+
+CREATE EXTERNAL TABLE IF NOT EXISTS 
tmp_druid_unique_devices_per_project_class_daily_${year}_${month}_${day} (
+    `dt`                     string,
+    `project_class`          string,
+    `country`                string,
+    `country_code`           string,
+    `uniques_underestimate`  bigint,
+    `uniques_offset`         bigint,
+    `uniques_estimate`       bigint
+)
+ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+STORED AS TEXTFILE
+LOCATION '${destination_directory}';
+
+
+INSERT OVERWRITE TABLE 
tmp_druid_unique_devices_per_project_class_daily_${year}_${month}_${day}
+SELECT
+    CONCAT(
+        LPAD(year, 4, '0'), '-',
+        LPAD(month, 2, '0'), '-',
+        LPAD(day, 2, '0'), 'T00:00:00Z') AS dt,
+    project_class AS project_class,
+    country AS country,
+    country_code AS country_code,
+    uniques_underestimate AS uniques_underestimate,
+    uniques_offset AS uniques_offset,
+    uniques_estimate AS uniques_estimate
+FROM ${source_table}
+WHERE year = ${year}
+    AND month = ${month}
+    AND day = ${day};
+
+
+DROP TABLE IF EXISTS 
tmp_druid_unique_devices_per_project_class_daily_${year}_${month}_${day};
diff --git 
a/oozie/unique_devices/project_wide/druid/daily/load_unique_devices_project_wide_daily.json.template
 
b/oozie/unique_devices/per_project_class/druid/daily/load_unique_devices_per_project_class_daily.json.template
similarity index 93%
rename from 
oozie/unique_devices/project_wide/druid/daily/load_unique_devices_project_wide_daily.json.template
rename to 
oozie/unique_devices/per_project_class/druid/daily/load_unique_devices_per_project_class_daily.json.template
index 2ed1ed0..3b333e4 100644
--- 
a/oozie/unique_devices/project_wide/druid/daily/load_unique_devices_project_wide_daily.json.template
+++ 
b/oozie/unique_devices/per_project_class/druid/daily/load_unique_devices_per_project_class_daily.json.template
@@ -9,7 +9,7 @@
       }
     },
     "dataSchema" : {
-      "dataSource" : "unique-devices-project-wide-daily",
+      "dataSource" : "unique-devices-per_project_class-daily",
       "granularitySpec" : {
         "type" : "uniform",
         "segmentGranularity" : "day",
@@ -22,7 +22,7 @@
           "format" : "json",
           "dimensionsSpec" : {
             "dimensions" : [
-                "project",
+                "project_class",
                 "country",
                 "country_code"
             ]
diff --git a/oozie/unique_devices/project_wide/druid/daily/workflow.xml 
b/oozie/unique_devices/per_project_class/druid/daily/workflow.xml
similarity index 87%
rename from oozie/unique_devices/project_wide/druid/daily/workflow.xml
rename to oozie/unique_devices/per_project_class/druid/daily/workflow.xml
index 9c427f8..8893212 100644
--- a/oozie/unique_devices/project_wide/druid/daily/workflow.xml
+++ b/oozie/unique_devices/per_project_class/druid/daily/workflow.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <workflow-app xmlns="uri:oozie:workflow:0.4"
-    name="unique_devices-project_wide-druid-daily-wf-${year}-${month}-${day}">
+    
name="unique_devices-per_project_class-druid-daily-wf-${year}-${month}-${day}">
 
     <parameters>
 
@@ -25,7 +25,7 @@
         </property>
 
         <property>
-            <name>unique_devices_project_wide_daily_table</name>
+            <name>unique_devices_per_project_class_daily_table</name>
             <description>The hive unique devices daily table to 
use</description>
         </property>
 
@@ -73,9 +73,9 @@
 
     </parameters>
 
-    <start to="generate_json_unique_devices_project_wide_daily"/>
+    <start to="generate_json_unique_devices_per_project_class_daily"/>
 
-    <action name="generate_json_unique_devices_project_wide_daily">
+    <action name="generate_json_unique_devices_per_project_class_daily">
         <hive xmlns="uri:oozie:hive-action:0.2">
             <job-tracker>${job_tracker}</job-tracker>
             <name-node>${name_node}</name-node>
@@ -105,24 +105,24 @@
                     <value>/tmp/hive-${user}</value>
                 </property>
             </configuration>
-            
<script>generate_druid_unique_devices_project_wide_daily.hql</script>
-            
<param>source_table=${unique_devices_project_wide_daily_table}</param>
-            
<param>destination_directory=${temporary_directory}/${wf:id()}-druid-unique_devices-project_wide-daily-${year}-${month}-${day}</param>
+            
<script>generate_druid_unique_devices_per_project_class_daily.hql</script>
+            
<param>source_table=${unique_devices_per_project_class_daily_table}</param>
+            
<param>destination_directory=${temporary_directory}/${wf:id()}-druid-unique_devices-per_project_class-daily-${year}-${month}-${day}</param>
             <param>year=${year}</param>
             <param>month=${month}</param>
             <param>day=${day}</param>
         </hive>
-        <ok to="mark_json_unique_devices_project_wide_daily_dataset_done" />
+        <ok to="mark_json_unique_devices_per_project_class_daily_dataset_done" 
/>
         <error to="send_error_email" />
     </action>
 
-    <action name="mark_json_unique_devices_project_wide_daily_dataset_done">
+    <action 
name="mark_json_unique_devices_per_project_class_daily_dataset_done">
         <sub-workflow>
             <app-path>${mark_directory_done_workflow_file}</app-path>
             <configuration>
                 <property>
                     <name>directory</name>
-                    
<value>${temporary_directory}/${wf:id()}-druid-unique_devices-project_wide-daily-${year}-${month}-${day}</value>
+                    
<value>${temporary_directory}/${wf:id()}-druid-unique_devices-per_project_class-daily-${year}-${month}-${day}</value>
                 </property>
             </configuration>
         </sub-workflow>
@@ -138,7 +138,7 @@
             <configuration>
                 <property>
                     <name>source_directory</name>
-                    
<value>${temporary_directory}/${wf:id()}-druid-unique_devices-project_wide-daily-${year}-${month}-${day}</value>
+                    
<value>${temporary_directory}/${wf:id()}-druid-unique_devices-per_project_class-daily-${year}-${month}-${day}</value>
                 </property>
                 <property>
                     <name>template_file</name>
@@ -160,7 +160,7 @@
 
     <action name="remove_temporary_data">
         <fs>
-            <delete 
path="${temporary_directory}/${wf:id()}-druid-unique_devices-project_wide-daily-${year}-${month}-${day}"/>
+            <delete 
path="${temporary_directory}/${wf:id()}-druid-unique_devices-per_project_class-daily-${year}-${month}-${day}"/>
         </fs>
         <ok to="end"/>
         <error to="send_error_email"/>
diff --git a/oozie/unique_devices/project_wide/monthly/coordinator.properties 
b/oozie/unique_devices/per_project_class/monthly/coordinator.properties
similarity index 78%
rename from oozie/unique_devices/project_wide/monthly/coordinator.properties
rename to oozie/unique_devices/per_project_class/monthly/coordinator.properties
index 329f3d0..4968a19 100644
--- a/oozie/unique_devices/project_wide/monthly/coordinator.properties
+++ b/oozie/unique_devices/per_project_class/monthly/coordinator.properties
@@ -1,10 +1,10 @@
-# Configures a coordinator to generate monthly project-wide unique devices
+# Configures a coordinator to generate monthly per-project-class unique devices
 # using last_access method on webrequest table and save a reduced version
-# (project aggregates without wikimedia) in archive folders.
+# (project-class aggregates without wikimedia) in archive folders.
 #
 # Any of the following properties are overidable with -D.
 # Usage:
-#   oozie job -Duser=$USER -Dstart_time=2017-04-01T00:00Z -submit -config 
oozie/unique_devices/project_wide/monthly/coordinator.properties
+#   oozie job -Duser=$USER -Dstart_time=2017-04-01T00:00Z -submit -config 
oozie/unique_devices/per_project_class/monthly/coordinator.properties
 #
 # NOTE:  The $oozie_directory must be synced to HDFS so that all relevant
 #        .xml files exist there when this job is submitted.
@@ -32,11 +32,11 @@
 webrequest_data_directory         = ${name_node}/wmf/data/wmf/webrequest
 webrequest_datasets_file          = ${oozie_directory}/webrequest/datasets.xml
 
-# uniques devices project_wide monthly settings
-unique_devices_project_wide_monthly_table     = 
wmf.unique_devices_project_wide_monthly
+# uniques devices per_project_class monthly settings
+unique_devices_per_project_class_monthly_table     = 
wmf.unique_devices_per_project_class_monthly
 unique_devices_data_directory                 = 
${name_node}/wmf/data/wmf/unique_devices
 unique_devices_datasets_file                  = 
${oozie_directory}/unique_devices/datasets.xml
-unique_devices_project_wide_archive_directory = 
${archive_directory}/unique_devices/project_wide
+unique_devices_per_project_class_archive_directory = 
${archive_directory}/unique_devices/per_project_class
 
 # refinery-hive jar path for RedirectToPageview UDF
 refinery_hive_jar                 = 
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-hive-0.0.46.jar
@@ -44,9 +44,9 @@
 temporary_directory               = ${name_node}/tmp
 
 user                              = hdfs
-workflow_file                     = 
${oozie_directory}/unique_devices/project_wide/monthly/workflow.xml
+workflow_file                     = 
${oozie_directory}/unique_devices/per_project_class/monthly/workflow.xml
 
 # Coordinator app to run.
 oozie.use.system.libpath          = true
 oozie.action.external.stats.write = true
-oozie.coord.application.path      = 
${oozie_directory}/unique_devices/project_wide/monthly/coordinator.xml
+oozie.coord.application.path      = 
${oozie_directory}/unique_devices/per_project_class/monthly/coordinator.xml
diff --git a/oozie/unique_devices/project_wide/monthly/coordinator.xml 
b/oozie/unique_devices/per_project_class/monthly/coordinator.xml
similarity index 84%
rename from oozie/unique_devices/project_wide/monthly/coordinator.xml
rename to oozie/unique_devices/per_project_class/monthly/coordinator.xml
index 694a6ac..217086e 100644
--- a/oozie/unique_devices/project_wide/monthly/coordinator.xml
+++ b/oozie/unique_devices/per_project_class/monthly/coordinator.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <coordinator-app xmlns="uri:oozie:coordinator:0.4"
-    name="unique_devices-project_wide-monthly-coord"
+    name="unique_devices-per_project_class-monthly-coord"
     frequency="${coord:months(1)}"
     start="${start_time}"
     end="${stop_time}"
@@ -22,10 +22,10 @@
         <property><name>webrequest_data_directory</name></property>
         <property><name>webrequest_datasets_file</name></property>
 
-        
<property><name>unique_devices_project_wide_monthly_table</name></property>
+        
<property><name>unique_devices_per_project_class_monthly_table</name></property>
         <property><name>unique_devices_data_directory</name></property>
         <property><name>unique_devices_datasets_file</name></property>
-        
<property><name>unique_devices_project_wide_archive_directory</name></property>
+        
<property><name>unique_devices_per_project_class_archive_directory</name></property>
 
         <property><name>temporary_directory</name></property>
 
@@ -58,7 +58,7 @@
     </input-events>
 
     <output-events>
-        <data-out name="unique_devices_project_wide_monthly_output" 
dataset="unique_devices_project_wide_monthly">
+        <data-out name="unique_devices_per_project_class_monthly_output" 
dataset="unique_devices_per_project_class_monthly">
             <instance>${coord:current(0)}</instance>
         </data-out>
     </output-events>
@@ -86,8 +86,8 @@
                   <value>${coord:formatTime(coord:nominalTime(), "MM")}</value>
               </property>
               <property>
-                    
<name>unique_devices_project_wide_monthly_current_directory</name>
-                    
<value>${coord:dataOut('unique_devices_project_wide_monthly_output')}</value>
+                    
<name>unique_devices_per_project_class_monthly_current_directory</name>
+                    
<value>${coord:dataOut('unique_devices_per_project_class_monthly_output')}</value>
                 </property>
           </configuration>
       </workflow>
diff --git 
a/oozie/unique_devices/project_wide/monthly/unique_devices_project_wide_monthly.hql
 
b/oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly.hql
similarity index 76%
rename from 
oozie/unique_devices/project_wide/monthly/unique_devices_project_wide_monthly.hql
rename to 
oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly.hql
index 5ddf6cb..1ba32d8 100644
--- 
a/oozie/unique_devices/project_wide/monthly/unique_devices_project_wide_monthly.hql
+++ 
b/oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly.hql
@@ -1,4 +1,4 @@
--- Generates unique devices project-wide monthly based on 
WMF-Last-Access-global cookie
+-- Generates unique devices per-project-class monthly based on 
WMF-Last-Access-global cookie
 --
 -- Parameters:
 --     source_table        -- Table containing source data
@@ -8,9 +8,9 @@
 --     refinery_hive_jar   -- The path to refinery-hive jar for UDF
 --
 -- Usage
---     hive -f unique_devices_project_wide_monthly.hql \
+--     hive -f unique_devices_per_project_class_monthly.hql \
 --         -d source_table=wmf.webrequest \
---         -d destination_table=wmf.unique_devices_project_wide_monthly \
+--         -d destination_table=wmf.unique_devices_per_project_class_monthly \
 --         -d year=2017 \
 --         -d month=4  \
 --         -d 
refinery_hive_jar=/wmf/refinery/current/artifacts/refinery-hive.jar
@@ -29,7 +29,7 @@
     SELECT
         year,
         month,
-        normalized_host.project_class AS project,
+        normalized_host.project_class AS project_class,
         geocoded_data['country'] AS country,
         geocoded_data['country_code'] AS country_code,
         unix_timestamp(x_analytics_map['WMF-Last-Access-Global'], 
'dd-MMM-yyyy') AS last_access_global,
@@ -51,29 +51,29 @@
 -- (fresh sessions are not counted with last_access method)
 fresh_sessions_aggregated AS (
     SELECT
-        project,
+        project_class,
         country,
         country_code,
         COUNT(1) AS uniques_offset
     FROM (
         SELECT
-            hash(ip, user_agent, accept_language, project) AS id,
-            project,
+            hash(ip, user_agent, accept_language, project_class) AS id,
+            project_class,
             country,
             country_code,
             SUM(CASE WHEN (nocookies IS NOT NULL) THEN 1 ELSE 0 END)
         FROM
             last_access_dates
         GROUP BY
-            hash(ip, user_agent, accept_language, project),
-            project,
+            hash(ip, user_agent, accept_language, project_class),
+            project_class,
             country,
             country_code
         -- Only keeping clients having done 1 event without cookies
         HAVING SUM(CASE WHEN (nocookies IS NOT NULL) THEN 1 ELSE 0 END) = 1
         ) fresh_sessions
     GROUP BY
-        project,
+        project_class,
         country,
         country_code
 )
@@ -82,12 +82,12 @@
     PARTITION(year = ${year}, month = ${month})
 
 SELECT
-    COALESCE(la.project, fresh.project) as project,
+    COALESCE(la.project_class, fresh.project_class) as project_class,
     COALESCE(la.country, fresh.country) as country,
     COALESCE(la.country_code, fresh.country_code) as country_code,
     SUM(CASE
-        -- project set, last-access-global not set and client accept cookies 
--> first visit, count
-        WHEN (la.project IS NOT NULL AND la.last_access_global IS NULL AND 
la.nocookies is NULL) THEN 1
+        -- project_class set, last-access-global not set and client accept 
cookies --> first visit, count
+        WHEN (la.project_class IS NOT NULL AND la.last_access_global IS NULL 
AND la.nocookies is NULL) THEN 1
         -- last-access-global set and its date is before today --> First visit 
today, count
         WHEN ((la.last_access_global IS NOT NULL)
             AND (la.last_access_global < unix_timestamp(CONCAT('${year}-', 
LPAD('${month}', 2, '0'), '-','01'), 'yyyy-MM-dd'))) THEN 1
@@ -96,8 +96,8 @@
     END) AS uniques_underestimate,
     COALESCE(fresh.uniques_offset, 0) AS uniques_offset,
     SUM(CASE
-        -- project set, last-access-global not set and client accept cookies 
--> first visit, count
-        WHEN (la.project IS NOT NULL AND la.last_access_global IS NULL AND 
la.nocookies is NULL) THEN 1
+        -- project_class set, last-access-global not set and client accept 
cookies --> first visit, count
+        WHEN (la.project_class IS NOT NULL AND la.last_access_global IS NULL 
AND la.nocookies is NULL) THEN 1
         -- last-access-global set and its date is before today --> First visit 
today, count
         WHEN ((la.last_access_global IS NOT NULL)
             AND (la.last_access_global < unix_timestamp(CONCAT('${year}-', 
LPAD('${month}', 2, '0'), '-','01'), 'yyyy-MM-dd'))) THEN 1
@@ -108,9 +108,9 @@
     last_access_dates AS la
      -- Outer join to keep every row from both table
     FULL OUTER JOIN fresh_sessions_aggregated AS fresh
-        ON (la.project = fresh.project AND la.country_code = 
fresh.country_code)
+        ON (la.project_class = fresh.project_class AND la.country_code = 
fresh.country_code)
 GROUP BY
-    COALESCE(la.project, fresh.project),
+    COALESCE(la.project_class, fresh.project_class),
     COALESCE(la.country, fresh.country),
     COALESCE(la.country_code, fresh.country_code),
     COALESCE(fresh.uniques_offset, 0)
diff --git 
a/oozie/unique_devices/project_wide/monthly/unique_devices_project_wide_monthly_to_archive.hql
 
b/oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly_to_archive.hql
similarity index 79%
rename from 
oozie/unique_devices/project_wide/monthly/unique_devices_project_wide_monthly_to_archive.hql
rename to 
oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly_to_archive.hql
index b6069b1..8b512b2 100644
--- 
a/oozie/unique_devices/project_wide/monthly/unique_devices_project_wide_monthly_to_archive.hql
+++ 
b/oozie/unique_devices/per_project_class/monthly/unique_devices_per_project_class_monthly_to_archive.hql
@@ -1,4 +1,4 @@
--- Aggregate unique devices project-wide monthly by project and remove 
wikimedia.
+-- Aggregate unique devices per-project-class monthly by project and remove 
wikimedia.
 --
 -- Parameters:
 --     source_table           -- Table containing source data
@@ -7,9 +7,9 @@
 --     month                  -- month of the to-be-generated
 --
 -- Usage:
---     hive -f unique_devices_project_wide_monthly_to_archive.hql \
---         -d source_table=wmf.unique_devices_project_wide_monthly \
---         -d destination_directory=/tmp/archive/unique_devices/project_wide \
+--     hive -f unique_devices_per_project_class_monthly_to_archive.hql \
+--         -d source_table=wmf.unique_devices_per_project_class_monthly \
+--         -d 
destination_directory=/tmp/archive/unique_devices/per_project_class \
 --         -d year=2017 \
 --         -d month=4
 
@@ -26,22 +26,22 @@
     -- Set 0 as volume column since we don't use it.
     SELECT
         CONCAT_WS('\t',
-          project,
+          project_class,
           cast(uniques_underestimate AS string),
           cast(uniques_offset AS string),
           cast(uniques_estimate AS string)) AS line
     FROM (
         SELECT
-            project,
+            project_class,
             SUM(uniques_underestimate) AS uniques_underestimate,
             SUM(uniques_offset) AS uniques_offset,
             SUM(uniques_estimate) AS uniques_estimate
         FROM ${source_table}
         WHERE year=${year}
             AND month=${month}
-            AND project != 'wikimedia'
+            AND project_class != 'wikimedia'
         GROUP BY
-            project
+            project_class
         ORDER BY
             uniques_estimate DESC
         LIMIT 100000000
diff --git a/oozie/unique_devices/project_wide/monthly/workflow.xml 
b/oozie/unique_devices/per_project_class/monthly/workflow.xml
similarity index 88%
rename from oozie/unique_devices/project_wide/monthly/workflow.xml
rename to oozie/unique_devices/per_project_class/monthly/workflow.xml
index c4cff08..4b11bcd 100644
--- a/oozie/unique_devices/project_wide/monthly/workflow.xml
+++ b/oozie/unique_devices/per_project_class/monthly/workflow.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <workflow-app xmlns="uri:oozie:workflow:0.4"
-    name="unique_devices-project_wide-monthly-wf-${year}-${month}">
+    name="unique_devices-per_project_class-monthly-wf-${year}-${month}">
     <parameters>
 
         <!-- Default values for oozie settings -->
@@ -28,7 +28,7 @@
             <description>Hive table to read data from.</description>
         </property>
         <property>
-            <name>unique_devices_project_wide_monthly_table</name>
+            <name>unique_devices_per_project_class_monthly_table</name>
             <description>The table to store project-wide monthly unique 
devices data in.</description>
         </property>
         <property>
@@ -52,11 +52,11 @@
             <description>Path to refinery-hive jar containing 
UDFs</description>
         </property>
         <property>
-            <name>unique_devices_project_wide_monthly_current_directory</name>
+            
<name>unique_devices_per_project_class_monthly_current_directory</name>
             <description>Directory where the currently computed data is 
stored</description>
         </property>
         <property>
-            <name>unique_devices_project_wide_archive_directory</name>
+            <name>unique_devices_per_project_class_archive_directory</name>
             <description>Directory where the archived data is 
stored</description>
         </property>
 
@@ -74,9 +74,9 @@
         </property>
     </parameters>
 
-    <start to="compute_unique_devices_project_wide_monthly"/>
+    <start to="compute_unique_devices_per_project_class_monthly"/>
 
-    <action name="compute_unique_devices_project_wide_monthly">
+    <action name="compute_unique_devices_per_project_class_monthly">
         <hive xmlns="uri:oozie:hive-action:0.2">
             <job-tracker>${job_tracker}</job-tracker>
             <name-node>${name_node}</name-node>
@@ -105,9 +105,9 @@
                     <value>/tmp/hive-${user}</value>
                 </property>
             </configuration>
-            <script>unique_devices_project_wide_monthly.hql</script>
+            <script>unique_devices_per_project_class_monthly.hql</script>
             <param>source_table=${webrequest_table}</param>
-            
<param>destination_table=${unique_devices_project_wide_monthly_table}</param>
+            
<param>destination_table=${unique_devices_per_project_class_monthly_table}</param>
             <param>year=${year}</param>
             <param>month=${month}</param>
             <param>refinery_hive_jar=${refinery_hive_jar}</param>
@@ -122,7 +122,7 @@
             <configuration>
                 <property>
                     <name>directory</name>
-                    
<value>${unique_devices_project_wide_monthly_current_directory}</value>
+                    
<value>${unique_devices_per_project_class_monthly_current_directory}</value>
                 </property>
             </configuration>
         </sub-workflow>
@@ -161,10 +161,10 @@
                 </property>
             </configuration>
 
-            <script>unique_devices_project_wide_monthly_to_archive.hql</script>
+            
<script>unique_devices_per_project_class_monthly_to_archive.hql</script>
             <!-- Here, the source for archive is the
                  destination of the previous job -->
-            
<param>source_table=${unique_devices_project_wide_monthly_table}</param>
+            
<param>source_table=${unique_devices_per_project_class_monthly_table}</param>
             <param>year=${year}</param>
             <param>month=${month}</param>
             
<param>destination_directory=${temporary_directory}/${wf:id()}</param>
@@ -203,7 +203,7 @@
                 </property>
                 <property>
                     <name>archive_file</name>
-                    
<value>${unique_devices_project_wide_archive_directory}/${padded_year}/${padded_year}-${padded_month}/unique_devices_project_wide_monthly-${padded_year}-${padded_month}.gz</value>
+                    
<value>${unique_devices_per_project_class_archive_directory}/${padded_year}/${padded_year}-${padded_month}/unique_devices_per_project_class_monthly-${padded_year}-${padded_month}.gz</value>
                 </property>
             </configuration>
         </sub-workflow>
diff --git 
a/oozie/unique_devices/project_wide/druid/daily/generate_druid_unique_devices_project_wide_daily.hql
 
b/oozie/unique_devices/project_wide/druid/daily/generate_druid_unique_devices_project_wide_daily.hql
deleted file mode 100644
index ba892fa..0000000
--- 
a/oozie/unique_devices/project_wide/druid/daily/generate_druid_unique_devices_project_wide_daily.hql
+++ /dev/null
@@ -1,55 +0,0 @@
--- Extracts one day of json formatted daily unique devices
--- project-wide to be loaded in Druid
---
--- Usage:
---     hive -f generate_druid_unique_devices_project_wide_daily.hql \
---         -d source_table=wmf.unique_devices_project_wide_daily \
---         -d 
destination_directory=/tmp/druid/unique_devices_project_wide_daily_json \
---         -d year=2017 \
---         -d month=4 \
---         -d day=1
---
-
-SET hive.exec.compress.output=true;
-SET 
mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec;
-
-
-ADD JAR /usr/lib/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar;
-
-
-DROP TABLE IF EXISTS 
tmp_druid_unique_devices_project_wide_daily_${year}_${month}_${day};
-
-
-CREATE EXTERNAL TABLE IF NOT EXISTS 
tmp_druid_unique_devices_project_wide_daily_${year}_${month}_${day} (
-    `dt`                     string,
-    `project`                string,
-    `country`                string,
-    `country_code`           string,
-    `uniques_underestimate`  bigint,
-    `uniques_offset`         bigint,
-    `uniques_estimate`       bigint
-)
-ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
-STORED AS TEXTFILE
-LOCATION '${destination_directory}';
-
-
-INSERT OVERWRITE TABLE 
tmp_druid_unique_devices_project_wide_daily_${year}_${month}_${day}
-SELECT
-    CONCAT(
-        LPAD(year, 4, '0'), '-',
-        LPAD(month, 2, '0'), '-',
-        LPAD(day, 2, '0'), 'T00:00:00Z') AS dt,
-    project AS project,
-    country AS country,
-    country_code AS country_code,
-    uniques_underestimate AS uniques_underestimate,
-    uniques_offset AS uniques_offset,
-    uniques_estimate AS uniques_estimate
-FROM ${source_table}
-WHERE year = ${year}
-    AND month = ${month}
-    AND day = ${day};
-
-
-DROP TABLE IF EXISTS 
tmp_druid_unique_devices_project_wide_daily_${year}_${month}_${day};

-- 
To view, visit https://gerrit.wikimedia.org/r/360327
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If5298a9ce5ae03483b52fd6fb251a91fadf80abe
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to