Joal has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/349266 )

Change subject: Update restbase oozie spark job
......................................................................


Update restbase oozie spark job

This job has been created before spark had dynamic allocation.
This patch removes the fixed number of executor and enables
dynamic allocation for this job.

Bug: T163479
Change-Id: Icf5b764ce3e8906707ff1958bcdbe1a094841c4c
---
M oozie/restbase/README.md
M oozie/restbase/coordinator.properties
M oozie/restbase/coordinator.xml
M oozie/restbase/workflow.xml
4 files changed, 9 insertions(+), 8 deletions(-)

Approvals:
  Mforns: Looks good to me, but someone else must approve
  Ottomata: Looks good to me, but someone else must approve
  Joal: Verified; Looks good to me, approved



diff --git a/oozie/restbase/README.md b/oozie/restbase/README.md
index 6f33e88..d8142f7 100644
--- a/oozie/restbase/README.md
+++ b/oozie/restbase/README.md
@@ -10,7 +10,7 @@
     oozie job -run \
          -config coordinator.properties \
          -D 
refinery_directory=hdfs://analytics-hadoop/user/madhuvishy/refinery \
-         -D 
spark_job_jar=hdfs://analytics-hadoop/user/madhuvishy/source/refinery-job-0.0.18-SNAPSHOT.jar
+         -D 
spark_job_jar=hdfs://analytics-hadoop/user/madhuvishy/source/refinery-job-0.0.45-SNAPSHOT.jar
 
 The results of this job can be viewed in Graphite (graphite.wikimedia.org) 
under the restbase namespace,
 in restbase.requests.varnish_requests.
\ No newline at end of file
diff --git a/oozie/restbase/coordinator.properties 
b/oozie/restbase/coordinator.properties
index 9b5997e..08b7d93 100644
--- a/oozie/restbase/coordinator.properties
+++ b/oozie/restbase/coordinator.properties
@@ -49,9 +49,9 @@
 spark_job_jar                     = 
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-job-0.0.41.jar
 spark_job_class                   = 
org.wikimedia.analytics.refinery.job.RESTBaseMetrics
 spark_job_name                    = restbase_metrics
-spark_number_executors            = 2
-spark_executor_memory             = 1G
-spark_driver_memory               = 2G
+spark_executor_memory             = 2G
+spark_driver_memory               = 4G
+spark_max_executors               = 16
 graphite_namespace                = restbase.requests
 graphite_host                     = graphite-in.eqiad.wmnet
 graphite_port                     = 2003
diff --git a/oozie/restbase/coordinator.xml b/oozie/restbase/coordinator.xml
index d2d777b..0a150e6 100644
--- a/oozie/restbase/coordinator.xml
+++ b/oozie/restbase/coordinator.xml
@@ -24,7 +24,7 @@
         <property><name>spark_job_class</name></property>
         <property><name>spark_executor_memory</name></property>
         <property><name>spark_driver_memory</name></property>
-        <property><name>spark_number_executors</name></property>
+        <property><name>spark_max_executors</name></property>
         <property><name>graphite_host</name></property>
         <property><name>graphite_port</name></property>
         <property><name>graphite_namespace</name></property>
diff --git a/oozie/restbase/workflow.xml b/oozie/restbase/workflow.xml
index 4f74856..2ec0dd5 100644
--- a/oozie/restbase/workflow.xml
+++ b/oozie/restbase/workflow.xml
@@ -40,8 +40,8 @@
             <description>Memory to allocate for spark driver 
process</description>
         </property>
         <property>
-            <name>spark_number_executors</name>
-            <description>Number of executors to run in parallel</description>
+            <name>spark_max_executors</name>
+            <description>Maximum concurrent number of executors for spark 
dynamic allocation</description>
         </property>
         <property>
             <name>year</name>
@@ -104,7 +104,8 @@
             <name>${spark_job_name}-${year}-${month}-${day}-${hour}</name>
             <class>${spark_job_class}</class>
             <jar>${spark_job_jar}</jar>
-            <spark-opts>--conf spark.yarn.jar=${spark_assembly_jar} 
--executor-memory ${spark_executor_memory} --driver-memory 
${spark_driver_memory} --num-executors ${spark_number_executors} --queue 
${queue_name} </spark-opts>
+            <spark-opts>--conf spark.yarn.jar=${spark_assembly_jar} 
--executor-memory ${spark_executor_memory} --driver-memory 
${spark_driver_memory} --queue ${queue_name} --conf 
spark.dynamicAllocation.enabled=true --conf spark.shuffle.service.enabled=true
+            --conf 
spark.dynamicAllocation.maxExecutors=${spark_max_executors}</spark-opts>
             <arg>--year</arg>
             <arg>${year}</arg>
             <arg>--month</arg>

-- 
To view, visit https://gerrit.wikimedia.org/r/349266
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Icf5b764ce3e8906707ff1958bcdbe1a094841c4c
Gerrit-PatchSet: 2
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <j...@wikimedia.org>
Gerrit-Reviewer: Joal <j...@wikimedia.org>
Gerrit-Reviewer: Mforns <mfo...@wikimedia.org>
Gerrit-Reviewer: Nuria <nu...@wikimedia.org>
Gerrit-Reviewer: Ottomata <ao...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to