Fdans has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/394062 )
Change subject: [wip] Add pageview by country oozie jobs
......................................................................
[wip] Add pageview by country oozie jobs
Adds coordinators and hive scripts necessary to load pageviews by country
data into cassandra.
Bug: T181521
Change-Id: I35b7736c07c5f2346cecf6c388dd252632aab88c
---
M oozie/cassandra/bundle.xml
A oozie/cassandra/coord_pageviews_bycountry_daily.properties
A oozie/cassandra/coord_pageviews_bycountry_monthly.properties
M oozie/cassandra/coord_top_articles_daily.properties
A oozie/cassandra/daily/pageview_top_bycountry.hql
A oozie/cassandra/monthly/pageview_top_bycountry.sql
6 files changed, 593 insertions(+), 7 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/62/394062/1
diff --git a/oozie/cassandra/bundle.xml b/oozie/cassandra/bundle.xml
index 0a4e656..99b8226 100644
--- a/oozie/cassandra/bundle.xml
+++ b/oozie/cassandra/bundle.xml
@@ -211,6 +211,59 @@
</property>
<!--**************************************************
+ Pageviews by country parameters
+ **************************************************-->
+
+ <property>
+ <name>top_bycountry_datasets_file</name>
+ <value>${pageview_datasets_file}</value>
+ </property>
+ <property>
+ <name>top_bycountry_dataset_name</name>
+ <value>pageview_hourly</value>
+ </property>
+ <property>
+ <name>top_bycountry_dataset_freq</name>
+ <value>hour</value>
+ </property>
+ <property>
+ <name>top_bycountry_hive_script</name>
+ <value>pageview_top_bycountry.hql</value>
+ </property>
+ <property>
+ <name>top_bycountry_source_table</name>
+ <value>wmf.pageview_hourly</value>
+ </property>
+ <property>
+ <name>top_bycountry_parallel_loaders</name>
+ <value>3</value>
+ </property>
+ <property>
+ <name>top_bycountry_keyspace</name>
+ <value>local_group_default_T_top_pageviews_bycountry</value>
+ </property>
+ <property>
+ <name>top_bycountry_cql</name>
+ <value>UPDATE "${top_articles_keyspace}"."data" SET
"countriesJSON" = ?</value>
+ </property>
+ <property>
+ <name>top_bycountry_hive_fields</name>
+ <value>project,year,month,day,countriesJSON</value>
+ </property>
+ <property>
+ <name>top_bycountry_hive_fields_types</name>
+ <value>text,text,text,text,text</value>
+ </property>
+ <property>
+ <name>top_bycountry_cassandra_fields</name>
+ <value>countriesJSON</value>
+ </property>
+ <property>
+ <name>top_bycountry_cassandra_primary_keys</name>
+ <value>_domain,project,year,month,day,_tid</value>
+ </property>
+
+ <!--**************************************************
Unique devices parameters
**************************************************-->
@@ -660,6 +713,142 @@
</configuration>
</coordinator>
+ <!--**************************************************
+ Pageviews by country coordinators
+ **************************************************-->
+
+ <coordinator name="cassandra-coord-pageviews-bycountry-daily">
+ <app-path>${coordinator_file_daily}</app-path>
+ <configuration>
+ <property>
+ <name>workflow_file</name>
+ <value>${workflow_file_daily}</value>
+ </property>
+
+ <!-- Dataset the job depends on -->
+ <property>
+ <name>datasets_file</name>
+ <value>${top_bycountry_datasets_file}</value>
+ </property>
+ <property>
+ <name>dataset_name</name>
+ <value>${top_bycountry_dataset_name}</value>
+ </property>
+ <!-- frequency of the dataset to manage different datasets
+ correctly in coordinator -->
+ <property>
+ <name>dataset_freq</name>
+ <value>${top_bycountry_dataset_freq}</value>
+ </property>
+
+ <!-- Hive data preparation job stuff -->
+ <property>
+ <name>hive_script</name>
+ <value>${top_bycountry_hive_script}</value>
+ </property>
+ <property>
+ <name>source_table</name>
+ <value>${top_bycountry_source_table}</value>
+ </property>
+
+ <!-- cassandra loader job stuff -->
+ <property>
+ <name>cassandra_parallel_loaders</name>
+ <value>${top_bycountry_parallel_loaders}</value>
+ </property>
+ <property>
+ <name>cassandra_cql</name>
+ <value>${top_bycountry_cql}</value>
+ </property>
+ <property>
+ <name>cassandra_keyspace</name>
+ <value>${top_bycountry_keyspace}</value>
+ </property>
+ <property>
+ <name>hive_fields</name>
+ <value>${top_bycountry_hive_fields}</value>
+ </property>
+ <property>
+ <name>hive_fields_types</name>
+ <value>${top_bycountry_hive_fields_types}</value>
+ </property>
+ <property>
+ <name>cassandra_fields</name>
+ <value>${top_bycountry_cassandra_fields}</value>
+ </property>
+ <property>
+ <name>cassandra_primary_keys</name>
+ <value>${top_bycountry_cassandra_primary_keys}</value>
+ </property>
+ </configuration>
+ </coordinator>
+
+ <coordinator name="cassandra-coord-pageview-top-articles-monthly">
+ <app-path>${coordinator_file_monthly}</app-path>
+ <configuration>
+ <property>
+ <name>workflow_file</name>
+ <value>${workflow_file_monthly}</value>
+ </property>
+
+ <!-- Dataset the job depends on -->
+ <property>
+ <name>datasets_file</name>
+ <value>${top_bycountry_datasets_file}</value>
+ </property>
+ <property>
+ <name>dataset_name</name>
+ <value>${top_bycountry_dataset_name}</value>
+ </property>
+ <!-- frequency of the dataset to manage different datasets
+ correctly in coordinator -->
+ <property>
+ <name>dataset_freq</name>
+ <value>${top_bycountry_dataset_freq}</value>
+ </property>
+
+ <!-- Hive data preparation job stuff -->
+ <property>
+ <name>hive_script</name>
+ <value>${top_bycountry_hive_script}</value>
+ </property>
+ <property>
+ <name>source_table</name>
+ <value>${top_bycountry_source_table}</value>
+ </property>
+
+ <!-- cassandra loader job stuff -->
+ <property>
+ <name>cassandra_parallel_loaders</name>
+ <value>${top_bycountry_parallel_loaders}</value>
+ </property>
+ <property>
+ <name>cassandra_cql</name>
+ <value>${top_bycountry_cql}</value>
+ </property>
+ <property>
+ <name>cassandra_keyspace</name>
+ <value>${top_bycountry_keyspace}</value>
+ </property>
+ <property>
+ <name>hive_fields</name>
+ <value>${top_bycountry_hive_fields}</value>
+ </property>
+ <property>
+ <name>hive_fields_types</name>
+ <value>${top_bycountry_hive_fields_types}</value>
+ </property>
+ <property>
+ <name>cassandra_fields</name>
+ <value>${top_bycountry_cassandra_fields}</value>
+ </property>
+ <property>
+ <name>cassandra_primary_keys</name>
+ <value>${top_bycountry_cassandra_primary_keys}</value>
+ </property>
+ </configuration>
+ </coordinator>
+
<!--**************************************************
Unique devices coordinators
diff --git a/oozie/cassandra/coord_pageviews_bycountry_daily.properties
b/oozie/cassandra/coord_pageviews_bycountry_daily.properties
new file mode 100644
index 0000000..89f195a
--- /dev/null
+++ b/oozie/cassandra/coord_pageviews_bycountry_daily.properties
@@ -0,0 +1,99 @@
+# Configures a coordinator to manage loading cassandra for the top countries
daily
+# pageview API.Any of the following properties are overidable with -D.
+# Usage:
+# oozie job -Duser=$USER -Dstart_time=2015-05-05T00:00Z -submit -config
oozie/cassandra/coord_top_articles_daily.properties
+#
+# NOTE: The $oozie_directory must be synced to HDFS so that all relevant
+# .xml files exist there when this job is submitted.
+
+
+name_node = hdfs://analytics-hadoop
+job_tracker = resourcemanager.analytics.eqiad.wmnet:8032
+queue_name = default
+
+user = hdfs
+
+# Base path in HDFS to refinery.
+# When submitting this job for production, you should
+# override this to point directly at a deployed
+# directory name, and not the 'symbolic' 'current' directory.
+# E.g. /wmf/refinery/2015-01-05T17.59.18Z--7bb7f07
+refinery_directory = ${name_node}/wmf/refinery/current
+
+# HDFS path to the refinery job jar that will be used by this job.
+refinery_cassandra_jar_path =
${refinery_directory}/artifacts/org/wikimedia/analytics/refinery/refinery-cassandra-0.0.35.jar
+cassandra_reducer_class =
org.wikimedia.analytics.refinery.cassandra.ReducerToCassandra
+cassandra_output_format_class =
org.wikimedia.analytics.refinery.cassandra.CqlOutputFormat
+
+# Base path in HDFS to oozie files.
+# Other files will be used relative to this path.
+oozie_directory = ${refinery_directory}/oozie
+
+# HDFS path to coordinators to run.
+coordinator_file_hourly =
${oozie_directory}/cassandra/hourly/coordinator.xml
+coordinator_file_daily =
${oozie_directory}/cassandra/daily/coordinator.xml
+coordinator_file_monthly =
${oozie_directory}/cassandra/monthly/coordinator.xml
+
+# HDFS path to workflows to run.
+workflow_file_hourly =
${oozie_directory}/cassandra/hourly/workflow.xml
+workflow_file_daily =
${oozie_directory}/cassandra/daily/workflow.xml
+workflow_file_monthly =
${oozie_directory}/cassandra/monthly/workflow.xml
+
+# Workflow to send an error email
+send_error_email_workflow_file =
${oozie_directory}/util/send_error_email/workflow.xml
+
+# HDFS path to datasets definitions
+pageview_datasets_file = ${oozie_directory}/pageview/datasets.xml
+projectview_datasets_file = ${oozie_directory}/projectview/datasets.xml
+pageview_data_directory = ${name_node}/wmf/data/wmf/pageview
+projectview_data_directory = ${name_node}/wmf/data/wmf/projectview
+dataset_freq = hour
+
+# Initial import time of the webrequest dataset.
+start_time = 2015-05-01T00:00Z
+
+# Time to stop running this coordinator. Year 3000 == never!
+stop_time = 3000-01-01T00:00Z
+
+# HDFS path to hive-site.xml file. This is needed to run hive actions.
+hive_site_xml = ${name_node}/user/hive/hive-site.xml
+# Temporary directory
+temporary_directory = ${name_node}/tmp
+
+# Cassandra cluster info
+cassandra_host = aqs1004-a.eqiad.wmnet
+cassandra_port = 9042
+cassandra_username = aqsloader
+cassandra_password = cassandra
+cassandra_nodes = 6
+batch_size = 1024
+
+# Hive value separator
+hive_value_separator = \\t
+# Cassandra table to be loaded (not job dependant)
+cassandra_table = data
+
+# Constant field names and value to be loaded into cassandra
+constant_output_domain_field = _domain
+constant_output_domain_value = analytics.wikimedia.org,text
+constant_output_granularity_field = granularity
+constant_output_tid_field = _tid
+constant_output_tid_value = 0,timeuuid
+
+workflow_file = ${workflow_file_daily}
+datasets_file = ${pageview_datasets_file}
+dataset_name = pageview_hourly
+hive_script = pageview_top_countries.hql
+source_table = wmf.pageview_hourly
+cassandra_parallel_loaders = 6
+cassandra_keyspace =
local_group_default_T_top_pageviews_bycountry
+cassandra_cql = UPDATE "${cassandra_keyspace}"."data" SET
"countriesJSON" = ?
+hive_fields = project,year,month,day,countriesJSON
+hive_fields_types = text,text,text,text,text
+cassandra_fields = countriesJSON
+cassandra_primary_keys = _domain,project,year,month,day,_tid
+
+# Coordintator to start.
+oozie.coord.application.path = ${coordinator_file_daily}
+oozie.use.system.libpath = true
+oozie.action.external.stats.write = true
\ No newline at end of file
diff --git a/oozie/cassandra/coord_pageviews_bycountry_monthly.properties
b/oozie/cassandra/coord_pageviews_bycountry_monthly.properties
new file mode 100644
index 0000000..01ee58a
--- /dev/null
+++ b/oozie/cassandra/coord_pageviews_bycountry_monthly.properties
@@ -0,0 +1,101 @@
+# Configures a coordinator to manage loading cassandra for the top countries
monthly
+# pageview API.Any of the following properties are overidable with -D.
+# Usage:
+# oozie job -Duser=$USER -Dstart_time=2015-05-05T00:00Z -submit -config
oozie/cassandra/coord_top_articles_monthly.properties
+#
+# NOTE: The $oozie_directory must be synced to HDFS so that all relevant
+# .xml files exist there when this job is submitted.
+
+
+name_node = hdfs://analytics-hadoop
+job_tracker = resourcemanager.analytics.eqiad.wmnet:8032
+queue_name = default
+
+user = hdfs
+
+# Base path in HDFS to refinery.
+# When submitting this job for production, you should
+# override this to point directly at a deployed
+# directory name, and not the 'symbolic' 'current' directory.
+# E.g. /wmf/refinery/2015-01-05T17.59.18Z--7bb7f07
+refinery_directory = ${name_node}/wmf/refinery/current
+
+# HDFS path to the refinery job jar that will be used by this job.
+refinery_cassandra_jar_path =
${refinery_directory}/artifacts/org/wikimedia/analytics/refinery/refinery-cassandra-0.0.35.jar
+cassandra_reducer_class =
org.wikimedia.analytics.refinery.cassandra.ReducerToCassandra
+cassandra_output_format_class =
org.wikimedia.analytics.refinery.cassandra.CqlOutputFormat
+
+# Base path in HDFS to oozie files.
+# Other files will be used relative to this path.
+oozie_directory = ${refinery_directory}/oozie
+
+# HDFS path to coordinators to run.
+coordinator_file_hourly =
${oozie_directory}/cassandra/hourly/coordinator.xml
+coordinator_file_daily =
${oozie_directory}/cassandra/daily/coordinator.xml
+coordinator_file_monthly =
${oozie_directory}/cassandra/monthly/coordinator.xml
+
+# HDFS path to workflows to run.
+workflow_file_hourly =
${oozie_directory}/cassandra/hourly/workflow.xml
+workflow_file_daily =
${oozie_directory}/cassandra/daily/workflow.xml
+workflow_file_monthly =
${oozie_directory}/cassandra/monthly/workflow.xml
+
+# Workflow to send an error email
+send_error_email_workflow_file =
${oozie_directory}/util/send_error_email/workflow.xml
+
+# HDFS path to datasets definitions
+pageview_datasets_file = ${oozie_directory}/pageview/datasets.xml
+projectview_datasets_file = ${oozie_directory}/projectview/datasets.xml
+pageview_data_directory = ${name_node}/wmf/data/wmf/pageview
+projectview_data_directory = ${name_node}/wmf/data/wmf/projectview
+# provides the frequency of the dataset used to manage
+#different datasets correctly in coordinator
+dataset_freq = hour
+
+# Initial import time of the webrequest dataset.
+start_time = 2015-05-01T00:00Z
+
+# Time to stop running this coordinator. Year 3000 == never!
+stop_time = 3000-01-01T00:00Z
+
+# HDFS path to hive-site.xml file. This is needed to run hive actions.
+hive_site_xml = ${name_node}/user/hive/hive-site.xml
+# Temporary directory
+temporary_directory = ${name_node}/tmp
+
+# Cassandra cluster info
+cassandra_host = aqs1004-a.eqiad.wmnet
+cassandra_port = 9042
+cassandra_username = aqsloader
+cassandra_password = cassandra
+cassandra_nodes = 6
+batch_size = 1024
+
+# Hive value separator
+hive_value_separator = \\t
+# Cassandra table to be loaded (not job dependant)
+cassandra_table = data
+
+# Constant field names and value to be loaded into cassandra
+constant_output_domain_field = _domain
+constant_output_domain_value = analytics.wikimedia.org,text
+constant_output_granularity_field = granularity
+constant_output_tid_field = _tid
+constant_output_tid_value = 0,timeuuid
+
+workflow_file = ${workflow_file_monthly}
+datasets_file = ${pageview_datasets_file}
+dataset_name = pageview_hourly
+hive_script = pageview_top_countries.hql
+source_table = wmf.pageview_hourly
+cassandra_parallel_loaders = 6
+cassandra_keyspace =
local_group_default_T_top_pageviews_bycountry
+cassandra_cql = UPDATE "${cassandra_keyspace}"."data" SET
"countriesJSON" = ?
+hive_fields = project,year,month,day,countriesJSON
+hive_fields_types = text,text,text,text,text
+cassandra_fields = countriesJSON
+cassandra_primary_keys = _domain,project,year,month,day,_tid
+
+# Coordintator to start.
+oozie.coord.application.path = ${coordinator_file_monthly}
+oozie.use.system.libpath = true
+oozie.action.external.stats.write = true
\ No newline at end of file
diff --git a/oozie/cassandra/coord_top_articles_daily.properties
b/oozie/cassandra/coord_top_articles_daily.properties
index 0c4c7c1..5d41246 100644
--- a/oozie/cassandra/coord_top_articles_daily.properties
+++ b/oozie/cassandra/coord_top_articles_daily.properties
@@ -83,15 +83,15 @@
workflow_file = ${workflow_file_daily}
datasets_file = ${pageview_datasets_file}
dataset_name = pageview_hourly
-hive_script = pageview_top_articles.hql
+hive_script = pageview_top_countries.hql
source_table = wmf.pageview_hourly
cassandra_parallel_loaders = 6
-cassandra_keyspace = local_group_default_T_top_pageviews
-cassandra_cql = UPDATE "${cassandra_keyspace}"."data" SET
"articlesJSON" = ?
-hive_fields = project,access,year,month,day,articlesJSON
-hive_fields_types = text,text,text,text,text,text
-cassandra_fields = articlesJSON
-cassandra_primary_keys = _domain,project,access,year,month,day,_tid
+cassandra_keyspace =
local_group_default_T_top_pageviews_bycountry
+cassandra_cql = UPDATE "${cassandra_keyspace}"."data" SET
"countriesJSON" = ?
+hive_fields = project,year,month,day,countriesJSON
+hive_fields_types = text,text,text,text,text
+cassandra_fields = countriesJSON
+cassandra_primary_keys = _domain,project,year,month,day,_tid
# Coordintator to start.
oozie.coord.application.path = ${coordinator_file_daily}
diff --git a/oozie/cassandra/daily/pageview_top_bycountry.hql
b/oozie/cassandra/daily/pageview_top_bycountry.hql
new file mode 100644
index 0000000..72aaa48
--- /dev/null
+++ b/oozie/cassandra/daily/pageview_top_bycountry.hql
@@ -0,0 +1,102 @@
+-- Parameters:
+-- destination_directory -- HDFS path to write output files
+-- source_table -- Fully qualified table name to compute from.
+-- separator -- Separator for values
+-- year -- year of partition to compute from.
+-- month -- month of partition to compute from.
+-- day -- day of partition to compute from.
+--
+-- Usage:
+-- hive -f pageview_top_bycountry.hql \
+-- -d destination_directory=/tmp/pageview_top_bycountry \
+-- -d source_table=wmf.pageview_hourly \
+-- -d separator=\t \
+-- -d year=2015 \
+-- -d month=5 \
+-- -d day=1 \
+--
+
+
+SET hive.exec.compress.output=true;
+SET
mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec;
+
+
+WITH ranked AS (
+ SELECT
+ project,
+ country,
+ year,
+ month,
+ day,
+ views,
+ rank() OVER (PARTITION BY project, year, month, day ORDER BY views
DESC) as rank,
+ row_number() OVER (PARTITION BY project, year, month, day ORDER BY
views DESC) as rn
+ FROM (
+ SELECT
+ project,
+ country,
+ LPAD(year, 4, "0") as year,
+ LPAD(month, 2, "0") as month,
+ LPAD(day, 2, "0") as day,
+ SUM(view_count) as views
+ FROM ${source_table}
+ WHERE
+ year = ${year}
+ AND month = ${month}
+ AND day = ${day}
+ -- Remove special unknown pageview (see T117346)
+ AND page_title != '-'
+ GROUP BY project, country, year, month, day
+ GROUPING SETS (
+ (project, country, year, month, day)
+ )
+ ) raw
+),
+max_rank AS (
+ SELECT
+ project,
+ year,
+ month,
+ day,
+ rank as max_rank
+ FROM ranked
+ WHERE
+ rn = 1001
+ GROUP BY
+ project,
+ year,
+ month,
+ day,
+ rank
+)
+INSERT OVERWRITE DIRECTORY "${destination_directory}"
+-- Since "ROW FORMAT DELIMITED DELIMITED FIELDS TERMINATED BY ' '" only
+-- works for exports to local directories (see HIVE-5672), we have to
+-- prepare the lines by hand through concatenation :-(
+SELECT
+ CONCAT_WS("${separator}",
+ ranked.project,
+ ranked.year,
+ ranked.month,
+ ranked.day,
+ CONCAT('[',
+ CONCAT_WS(',', collect_set(
+ CONCAT('{"country":"', ranked.country,
+ '","views":', CAST(ranked.views AS STRING),
+ ',"rank":', CAST(ranked.rank AS STRING), '}'))
+ ),']')
+ )
+FROM ranked
+LEFT JOIN max_rank ON (
+ ranked.project = max_rank.project
+ AND ranked.year = max_rank.year
+ AND ranked.month = max_rank.month
+ AND ranked.day = max_rank.day
+)
+WHERE ranked.rank < COALESCE(max_rank.max_rank, 1001)
+GROUP BY
+ ranked.project,
+ ranked.year,
+ ranked.month,
+ ranked.day
+;
\ No newline at end of file
diff --git a/oozie/cassandra/monthly/pageview_top_bycountry.sql
b/oozie/cassandra/monthly/pageview_top_bycountry.sql
new file mode 100644
index 0000000..05ed92b
--- /dev/null
+++ b/oozie/cassandra/monthly/pageview_top_bycountry.sql
@@ -0,0 +1,95 @@
+-- Parameters:
+-- destination_directory -- HDFS path to write output files
+-- source_table -- Fully qualified table name to compute from.
+-- separator -- Separator for values
+-- year -- year of partition to compute from.
+-- month -- month of partition to compute from.
+-- day -- day of partition to compute from.
+--
+-- Usage:
+-- hive -f pageview_top_bycountry.hql \
+-- -d destination_directory=/tmp/pageview_top_bycountry \
+-- -d source_table=wmf.pageview_hourly \
+-- -d separator=\t \
+-- -d year=2015 \
+-- -d month=5 \
+-- -d day=1 \
+--
+
+
+SET hive.exec.compress.output=true;
+SET
mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec;
+
+
+WITH ranked AS (
+ SELECT
+ project,
+ country,
+ year,
+ month,
+ views,
+ rank() OVER (PARTITION BY project, access, year, month ORDER BY views
DESC) as rank,
+ row_number() OVER (PARTITION BY project, access, year, month ORDER BY
views DESC) as rn
+ FROM (
+ SELECT
+ project,
+ country,
+ LPAD(year, 4, "0") as year,
+ LPAD(month, 2, "0") as month,
+ SUM(view_count) as views
+ FROM ${source_table}
+ WHERE
+ year = ${year}
+ AND month = ${month}
+ -- Remove special unknown pageview (see T117346)
+ AND page_title != '-'
+ GROUP BY project, country, year, month
+ GROUPING SETS (
+ (project, country, year, month)
+ )
+ ) raw
+),
+max_rank AS (
+ SELECT
+ project,
+ year,
+ month,
+ rank as max_rank
+ FROM ranked
+ WHERE
+ rn = 1001
+ GROUP BY
+ project,
+ year,
+ month,
+ rank
+)
+INSERT OVERWRITE DIRECTORY "${destination_directory}"
+-- Since "ROW FORMAT DELIMITED DELIMITED FIELDS TERMINATED BY ' '" only
+-- works for exports to local directories (see HIVE-5672), we have to
+-- prepare the lines by hand through concatenation :-(
+SELECT
+ CONCAT_WS("${separator}",
+ ranked.project,
+ ranked.year,
+ ranked.month,
+ 'all-days',
+ CONCAT('[',
+ CONCAT_WS(',', collect_set(
+ CONCAT('{"country":"', ranked.page_title,
+ '","views":', CAST(ranked.views AS STRING),
+ ',"rank":', CAST(ranked.rank AS STRING), '}'))
+ ),']')
+ )
+FROM ranked
+LEFT JOIN max_rank ON (
+ ranked.project = max_rank.project
+ AND ranked.year = max_rank.year
+ AND ranked.month = max_rank.month
+)
+WHERE ranked.rank < COALESCE(max_rank.max_rank, 1001)
+GROUP BY
+ ranked.project,
+ ranked.year,
+ ranked.month
+;
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/394062
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I35b7736c07c5f2346cecf6c388dd252632aab88c
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Fdans <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits