Hello Ottomata, jenkins-bot,

I'd like you to do a code review.  Please visit

    https://gerrit.wikimedia.org/r/335648

to review the following change.


Change subject: Revert "Add JVM Heap usage alarms for basic Hadoop daemons"
......................................................................

Revert "Add JVM Heap usage alarms for basic Hadoop daemons"

This reverts commit 4735b0f002a7ac5269186fd8399231cdcc5c2253.

Change-Id: I0843e88286cdcb3d7ff41fc844c93920ac1f2fb0
---
M hieradata/eqiad/cdh/hadoop.yaml
M modules/role/manifests/analytics_cluster/hadoop/master.pp
M modules/role/manifests/analytics_cluster/hadoop/standby.pp
M modules/role/manifests/analytics_cluster/hadoop/worker.pp
4 files changed, 1 insertion(+), 87 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/48/335648/1

diff --git a/hieradata/eqiad/cdh/hadoop.yaml b/hieradata/eqiad/cdh/hadoop.yaml
index 04e1baf..39c9879 100644
--- a/hieradata/eqiad/cdh/hadoop.yaml
+++ b/hieradata/eqiad/cdh/hadoop.yaml
@@ -32,8 +32,6 @@
 cdh::hadoop::net_topology_script_template: 
'role/analytics_cluster/hadoop/net-topology.py.erb'
 
 # Increase NameNode heapsize independent from other daemons
-# The opts value will be used for the JVM, meanwhile the raw number for alarms.
-cdh::hadoop::hadoop_namenode_heapsize: 4096
 cdh::hadoop::hadoop_namenode_opts: "-Xmx4096m"
 
 cdh::hadoop::mapreduce_reduce_shuffle_parallelcopies: 10
diff --git a/modules/role/manifests/analytics_cluster/hadoop/master.pp 
b/modules/role/manifests/analytics_cluster/hadoop/master.pp
index 4e976a4..d30450c 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/master.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/master.pp
@@ -90,32 +90,6 @@
                 Sudo::User['nagios-check_hdfs_active_namenode'],
             ],
         }
-
-        # Java heap space used alerts
-        # The goal is to get alarms for long running memory leaks like T153951
-        $namenode_jvm_warning_threshold  = 
hiera(cdh::hadoop::hadoop_namenode_heapsize) * 0.7
-        $namenode_jvm_critical_threshold = 
hiera(cdh::hadoop::hadoop_namenode_heapsize) * 0.9
-        monitoring::graphite_threshold { 'analytics_hadoop_hdfs_namenode':
-            description   => 'HDFS active Namenode JVM Heap usage',
-            metric        => 
"Hadoop.NameNode.${::hostname}_eqiad_wmnet_9980.Hadoop.NameNode.JvmMetrics.MemHeapUsedM.upper",
-            from          => '60min',
-            warning       => $namenode_jvm_warning_threshold,
-            critical      => $namenode_jvm_critical_threshold,
-            percentage    => '60',
-            contact_group => 'admins,analytics',
-        }
-
-        $rm_jvm_warning_threshold  = hiera(cdh::hadoop::yarn_heapsize) * 0.7
-        $rm_jvm_critical_threshold = hiera(cdh::hadoop::yarn_heapsize) * 0.9
-        monitoring::graphite_threshold { 
'analytics_hadoop_yarn_resource_manager':
-            description   => 'Yarn active ResourceManager JVM Heap usage',
-            metric        => 
"Hadoop.ResourceManager.${::hostname}_eqiad_wmnet_9980.Hadoop.ResourceManager.JvmMetrics.MemHeapUsedM.upper",
-            from          => '60min',
-            warning       => $rm_jvm_warning_threshold,
-            critical      => $rm_jvm_critical_threshold,
-            percentage    => '60',
-            contact_group => 'admins,analytics',
-        }
     }
 
     # Firewall
diff --git a/modules/role/manifests/analytics_cluster/hadoop/standby.pp 
b/modules/role/manifests/analytics_cluster/hadoop/standby.pp
index 3d30dfe..21c0455 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/standby.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/standby.pp
@@ -31,20 +31,6 @@
             contact_group => 'admins,analytics',
             require       => Class['cdh::hadoop::namenode::standby'],
         }
-
-        # Java heap space used alerts
-        # The goal is to get alarms for long running memory leaks like T153951
-        $namenode_jvm_warning_threshold  = 
hiera(cdh::hadoop::hadoop_namenode_heapsize) * 0.7
-        $namenode_jvm_critical_threshold = 
hiera(cdh::hadoop::hadoop_namenode_heapsize) * 0.9
-        monitoring::graphite_threshold { 'analytics_hadoop_namenode_hdfs':
-            description   => 'HDFS standby Namenode JVM Heap usage',
-            metric        => 
"Hadoop.NameNode.${::hostname}_eqiad_wmnet_9980.Hadoop.NameNode.JvmMetrics.MemHeapUsedM.upper",
-            from          => '60min',
-            warning       => $namenode_jvm_warning_threshold,
-            critical      => $namenode_jvm_critical_threshold,
-            percentage    => '60',
-            contact_group => 'admins,analytics',
-        }
     }
 
     # Firewall
@@ -57,25 +43,6 @@
         include ::cdh::hadoop::resourcemanager
         # Firewall
         include ::role::analytics_cluster::hadoop::ferm::resourcemanager
-
-        # Use jmxtrans for sending metrics
-        class { 'cdh::hadoop::jmxtrans::resourcemanager':
-            statsd  => hiera('statsd'),
-        }
-
-        # Java heap space used alerts
-        # The goal is to get alarms for long running memory leaks like T153951
-        $rm_jvm_warning_threshold  = hiera(cdh::hadoop::yarn_heapsize) * 0.7
-        $rm_jvm_critical_threshold = hiera(cdh::hadoop::yarn_heapsize) * 0.9
-        monitoring::graphite_threshold { 
'analytics_hadoop_yarn_resource_manager':
-            description   => 'YARN Resource Manager JVM Heap usage',
-            metric        => 
"Hadoop.ResourceManager.${::hostname}_eqiad_wmnet_9984.Hadoop.ResourceManager.JvmMetrics.MemHeapUsedM.upper",
-            from          => '60min',
-            warning       => $rm_jvm_warning_threshold,
-            critical      => $rm_jvm_critical_threshold,
-            percentage    => '60',
-            contact_group => 'admins,analytics',
-        }
     }
 
-}
+}
\ No newline at end of file
diff --git a/modules/role/manifests/analytics_cluster/hadoop/worker.pp 
b/modules/role/manifests/analytics_cluster/hadoop/worker.pp
index bda0a67..9030b91 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/worker.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/worker.pp
@@ -54,31 +54,6 @@
             contact_group  => 'admins,analytics',
             retry_interval => 3,
         }
-
-        # Java heap space used alerts
-        # The goal is to get alarms for long running memory leaks like T153951
-        $dn_jvm_warning_threshold  = hiera(cdh::hadoop::hadoop_heapsize) * 0.7
-        $dn_jvm_critical_threshold = hiera(cdh::hadoop::hadoop_heapsize) * 0.9
-        $nm_jvm_warning_threshold  = hiera(cdh::hadoop::yarn_heapsize) * 0.7
-        $nm_jvm_critical_threshold = hiera(cdh::hadoop::yarn_heapsize) * 0.9
-        monitoring::graphite_threshold { 'analytics_hadoop_yarn_nodemanager':
-            description   => 'YARN NodeManager JVM Heap usage',
-            metric        => 
"Hadoop.NodeManager.${::hostname}_eqiad_wmnet_9984.Hadoop.NodeManager.JvmMetrics.MemHeapUsedM.upper",
-            from          => '60min',
-            warning       => $dn_jvm_warning_threshold,
-            critical      => $dn_jvm_critical_threshold,
-            percentage    => '60',
-            contact_group => 'admins,analytics',
-        }
-        monitoring::graphite_threshold { 'analytics_hadoop_hdfs_datanode':
-            description   => 'HDFS DataNode JVM Heap usage',
-            metric        => 
"Hadoop.DataNode.${::hostname}_eqiad_wmnet_9981.Hadoop.DataNode.JvmMetrics.MemHeapUsedM.upper",
-            from          => '60min',
-            warning       => $nm_jvm_warning_threshold,
-            critical      => $nm_jvm_critical_threshold,
-            percentage    => '60',
-            contact_group => 'admins,analytics',
-        }
     }
 
     # hive::client is nice to have for jobs launched

-- 
To view, visit https://gerrit.wikimedia.org/r/335648
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0843e88286cdcb3d7ff41fc844c93920ac1f2fb0
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Elukey <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to