http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/metainfo.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/metainfo.xml b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/metainfo.xml new file mode 100644 index 0000000..a41e261 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/metainfo.xml @@ -0,0 +1,163 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<metainfo> + <schemaVersion>2.0</schemaVersion> + <services> + <service> + <name>NAGIOS</name> + <displayName>Nagios</displayName> + <comment>Nagios Monitoring and Alerting system</comment> + <version>3.5.0</version> + <components> + <component> + <name>NAGIOS_SERVER</name> + <displayName>Nagios Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <dependencies> + <dependency> + <name>HDFS/HDFS_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>MAPREDUCE2/MAPREDUCE2_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>OOZIE/OOZIE_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>YARN/YARN_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>HIVE/HCAT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <commandScript> + <script>scripts/nagios_server.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </component> + </components> + <osSpecifics> + <osSpecific> + <osFamily>any</osFamily> + <packages> + <package> + <name>perl</name> + </package> + <package> + <name>fping</name> + </package> + </packages> + </osSpecific> + <osSpecific> + <osFamily>ubuntu12</osFamily> + <packages> + <package> + <name>nagios3</name> + </package> + <package> + <name>nagios3-common</name> + </package> + <package> + <name>nagios3-dbg</name> + </package> + <package> + <name>nagios3-doc</name> + </package> + <package> + <name>nagios-plugins-extra</name> + </package> + <package> + <name>php5-curl</name> + </package> + <package> + <name>libapache2-mod-php5</name> + </package> + </packages> + </osSpecific> + <osSpecific> + <osFamily>redhat5,redhat6,suse11</osFamily> + <packages> + <package> + <name>nagios-plugins-1.4.9</name> + </package> + <package> + <name>nagios-3.5.0-99</name> + </package> + <package> + <name>nagios-www-3.5.0-99</name> + </package> + <package> + <name>nagios-devel-3.5.0-99</name> + </package> + <package> + <name>php</name> + </package> + </packages> + </osSpecific> + <osSpecific> + <osFamily>suse11</osFamily> + <packages> + <package> + <name>php5*-json</name> + </package> + <package> + <name>apache2?mod_php*</name> + </package> + <package> + <name>php-curl</name> + </package> + </packages> + </osSpecific> + <osSpecific> + <osFamily>redhat5</osFamily> + <packages> + <package> + <name>php-pecl-json.x86_64</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + <configuration-dependencies> + <config-type>nagios-env</config-type> + </configuration-dependencies> + <monitoringService>true</monitoringService> + </service> + </services> +</metainfo>
http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_aggregate.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_aggregate.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_aggregate.php new file mode 100644 index 0000000..792b25b --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_aggregate.php @@ -0,0 +1,248 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +define("PASSIVE_MODE_STR", "AMBARIPASSIVE="); + + $options = getopt ("f:s:n:w:c:t:"); + if (!array_key_exists('t', $options) || !array_key_exists('f', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options) || !array_key_exists('s', $options)) { + usage(); + exit(3); + } + $status_file=$options['f']; + $status_code=$options['s']; + $type=$options['t']; + $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); + $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); + if ($type == "service" && !array_key_exists('n', $options)) { + echo "Service description not provided -n option\n"; + exit(3); + } + if ($type == "service") { + $service_name=$options['n']; + /* echo "DESC: " . $service_name . "\n"; */ + } + + $result = array(); + $status_file_content = file_get_contents($status_file); + + $counts; + if ($type == "service") { + $counts=query_alert_count($status_file_content, $service_name, $status_code); + } else { + $counts=query_host_count($status_file_content, $status_code); + } + + if ($counts['total'] == 0) { + $percent = 0; + } else { + $percent = ($counts['actual']/$counts['total'])*100; + } + if ($percent >= $crit) { + echo "CRITICAL: total:<" . $counts['total'] . ">, affected:<" . $counts['actual'] . ">\n"; + exit (2); + } + if ($percent >= $warn) { + echo "WARNING: total:<" . $counts['total'] . ">, affected:<" . $counts['actual'] . ">\n"; + exit (1); + } + echo "OK: total:<" . $counts['total'] . ">, affected:<" . $counts['actual'] . ">\n"; + exit(0); + + + # Functions + /* print usage */ + function usage () { + echo "Usage: $0 -f <status_file_path> -t type(host/service) -s <status_codes> -n <service description> -w <warn%> -c <crit%>\n"; + } + + /* Query host count */ + function query_host_count ($status_file_content, $status_code) { + $num_matches = preg_match_all("/hoststatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $hostcounts_object = array (); + $total_hosts = 0; + $hosts = 0; + foreach ($matches[0] as $object) { + $total_hosts++; + if (getParameter($object, "current_state") == $status_code) { + $hosts++; + } + } + $hostcounts_object['total'] = $total_hosts; + $hostcounts_object['actual'] = $hosts; + return $hostcounts_object; + } + + /* Query Alert counts */ + function query_alert_count ($status_file_content, $service_name, $status_code) { + $num_matches = preg_match_all("/servicestatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $alertcounts_objects = array (); + $total_alerts=0; + $alerts=0; + foreach ($matches[0] as $object) { + $long_out = getParameter($object, "long_plugin_output"); + $skip_if_match=!strncmp($long_out, PASSIVE_MODE_STR, strlen(PASSIVE_MODE_STR)); + + if (getParameter($object, "service_description") == $service_name && !$skip_if_match) { + $total_alerts++; + if (getParameter($object, "current_state") >= $status_code) { + $alerts++; + } + } + } + $alertcounts_objects['total'] = $total_alerts; + $alertcounts_objects['actual'] = $alerts; + return $alertcounts_objects; + } + + function get_service_type($service_description) + { + $pieces = explode("::", $service_description); + switch ($pieces[0]) { + case "NAMENODE": + $pieces[0] = "HDFS"; + break; + case "JOBTRACKER": + $pieces[0] = "MAPREDUCE"; + break; + case "HBASEMASTER": + $pieces[0] = "HBASE"; + break; + case "SYSTEM": + case "HDFS": + case "MAPREDUCE": + case "HBASE": + case "STORM": + break; + default: + $pieces[0] = "UNKNOWN"; + } + return $pieces[0]; + } + + function getParameter($object, $key) + { + $pattern="/\s" . $key . "[\s= ]*([\S, ]*)\n/"; + $num_mat = preg_match($pattern, $object, $matches); + $value = ""; + if ($num_mat) { + $value = $matches[1]; + } + return $value; + } + +function indent($json) { + + $result = ''; + $pos = 0; + $strLen = strlen($json); + $indentStr = ' '; + $newLine = "\n"; + $prevChar = ''; + $outOfQuotes = true; + + for ($i=0; $i<=$strLen; $i++) { + + // Grab the next character in the string. + $char = substr($json, $i, 1); + + // Are we inside a quoted string? + if ($char == '"' && $prevChar != '\\') { + $outOfQuotes = !$outOfQuotes; + + // If this character is the end of an element, + // output a new line and indent the next line. + } else if(($char == '}' || $char == ']') && $outOfQuotes) { + $result .= $newLine; + $pos --; + for ($j=0; $j<$pos; $j++) { + $result .= $indentStr; + } + } + + // Add the character to the result string. + $result .= $char; + + // If the last character was the beginning of an element, + // output a new line and indent the next line. + if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) { + $result .= $newLine; + if ($char == '{' || $char == '[') { + $pos ++; + } + + for ($j = 0; $j < $pos; $j++) { + $result .= $indentStr; + } + } + + $prevChar = $char; + } + + return $result; +} + +/* JSON documment format */ +/* +{ + "programstatus":{ + "last_command_check":"1327385743" + }, + "hostcounts":{ + "up_nodes":"", + "down_nodes":"" + }, + "hoststatus":[ + { + "host_name"="ip-10-242-191-48.ec2.internal", + "current_state":"0", + "last_hard_state":"0", + "plugin_output":"PING OK - Packet loss = 0%, RTA = 0.04 ms", + "last_check":"1327385564", + "current_attempt":"1", + "last_hard_state_change":"1327362079", + "last_time_up":"1327385574", + "last_time_down":"0", + "last_time_unreachable":"0", + "is_flapping":"0", + "last_check":"1327385574", + "servicestatus":[ + ] + } + ], + "servicestatus":[ + { + "service_type":"HDFS", {HBASE, MAPREDUCE, HIVE, ZOOKEEPER} + "service_description":"HDFS Current Load", + "host_name"="ip-10-242-191-48.ec2.internal", + "current_attempt":"1", + "current_state":"0", + "plugin_output":"PING OK - Packet loss = 0%, RTA = 0.04 ms", + "last_hard_state_change":"1327362079", + "last_time_ok":"1327385479", + "last_time_warning":"0", + "last_time_unknown":"0", + "last_time_critical":"0", + "last_check":"1327385574", + "is_flapping":"0" + } + ] +} +*/ + +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_ambari_alerts.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_ambari_alerts.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_ambari_alerts.py new file mode 100644 index 0000000..833a798 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_ambari_alerts.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +import os +import optparse +import json +import traceback + +def main(): + + parser = optparse.OptionParser() + + parser.add_option("-H", "--host", dest="host", default="localhost", help="NameNode host") + parser.add_option("-n", "--name", dest="alert_name", help="Alert name to check") + parser.add_option("-f", "--file", dest="alert_file", help="File containing the alert structure") + + (options, args) = parser.parse_args() + + if options.alert_name is None: + print "Alert name is required (--name or -n)" + exit(-1) + + if options.alert_file is None: + print "Alert file is required (--file or -f)" + exit(-1) + + if not os.path.exists(options.alert_file): + print "Status is unreported" + exit(3) + + try: + with open(options.alert_file, 'r') as f: + data = json.load(f) + + buf_list = [] + exit_code = 0 + + for_hosts = data[options.alert_name] + if for_hosts.has_key(options.host): + for host_entry in for_hosts[options.host]: + buf_list.append(host_entry['text']) + alert_state = host_entry['state'] + if alert_state == 'CRITICAL' and exit_code < 2: + exit_code = 2 + elif alert_state == 'WARNING' and exit_code < 1: + exit_code = 1 + + if 0 == len(buf_list): + print "Status is not reported" + exit(3) + else: + print ", ".join(buf_list) + exit(exit_code) + + except Exception: + traceback.print_exc() + exit(3) + +if __name__ == "__main__": + main() + http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_checkpoint_time.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_checkpoint_time.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_checkpoint_time.py new file mode 100644 index 0000000..04e8d60 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_checkpoint_time.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +import os +import optparse +import time +import urllib2 +import json + +CRIT_MESSAGE = "CRITICAL: Last checkpoint time is below acceptable. Checkpoint was done {h}h. {m}m. ago" +WARNING_MESSAGE = "WARNING: Last checkpoint time is below acceptable. Checkpoint was done {h}h. {m}m. ago" +OK_MESSAGE = "OK: Last checkpoint time" +WARNING_JMX_MESSAGE = "WARNING: NameNode JMX not accessible" + +def main(): + current_time = int(round(time.time() * 1000)) + + parser = optparse.OptionParser() + + parser.add_option("-H", "--host", dest="host", + default="localhost", help="NameNode host") + parser.add_option("-p", "--port", dest="port", + default="50070", help="NameNode jmx port") + parser.add_option("-s", "--ssl-enabled", dest="is_ssl_enabled", + default=False, help="SSL Enabled") + parser.add_option("-w", "--warning", dest="warning", + default="200", help="Percent for warning alert") + parser.add_option("-c", "--critical", dest="crit", + default="200", help="Percent for critical alert") + parser.add_option("-t", "--period", dest="period", + default="21600", help="Period time") + parser.add_option("-x", "--txns", dest="txns", + default="1000000", + help="CheckpointNode will create a checkpoint of the namespace every 'dfs.namenode.checkpoint.txns'") + + (options, args) = parser.parse_args() + + scheme = "http" + if options.is_ssl_enabled == "true": + scheme = "https" + + host = get_available_nn_host(options,scheme) + + last_checkpoint_time_qry = "{scheme}://{host}:{port}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".format( + scheme=scheme, host=host, port=options.port) + + print last_checkpoint_time_qry + + last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime")) + + journal_transaction_info_qry = "{scheme}://{host}:{port}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".format( + scheme=scheme, host=host, port=options.port) + + journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo") + journal_transaction_info_dict = json.loads(journal_transaction_info) + + last_txid = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId']) + most_txid = int(journal_transaction_info_dict['MostRecentCheckpointTxId']) + + delta = (current_time - last_checkpoint_time)/1000 + + if ((last_txid - most_txid) > int(options.txns)) and (float(delta) / int(options.period)*100 >= int(options.crit)): + print CRIT_MESSAGE.format(h=get_time(delta)['h'], m=get_time(delta)['m']) + exit(2) + elif ((last_txid - most_txid) > int(options.txns)) and (float(delta) / int(options.period)*100 >= int(options.warning)): + print WARNING_MESSAGE.format(h=get_time(delta)['h'], m=get_time(delta)['m']) + exit(1) + else: + print OK_MESSAGE + exit(0) + + +def get_time(delta): + h = int(delta/3600) + m = int((delta % 3600)/60) + return {'h':h, 'm':m} + + +def get_value_from_jmx(qry, property): + try: + response = urllib2.urlopen(qry) + data=response.read() + except Exception: + print WARNING_JMX_MESSAGE + exit(1) + + data_dict = json.loads(data) + return data_dict["beans"][0][property] + + +def get_available_nn_host(options, scheme): + nn_hosts = options.host.split(" ") + for nn_host in nn_hosts: + try: + urllib2.urlopen("{scheme}://{host}:{port}/jmx".format(scheme=scheme, host=nn_host, port=options.port)) + return nn_host + except Exception: + pass + print WARNING_JMX_MESSAGE + exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.php new file mode 100644 index 0000000..0744e38 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.php @@ -0,0 +1,109 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:k:r:t:u:e"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); + $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['u']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + + $object = $json_array['beans'][0]; + + if (count($object) == 0) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + + $cpu_load = $object['SystemCpuLoad']; + + if (!isset($object['SystemCpuLoad']) || $cpu_load < 0.0) { + echo "WARNING: Data unavailable, SystemCpuLoad is not set\n"; + exit(1); + } + + $cpu_count = $object['AvailableProcessors']; + + $cpu_percent = $cpu_load*100; + } + + $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%'; + + if ($cpu_percent > $crit) { + echo $out_msg . ' > ' . $crit . "% : CRITICAL\n"; + exit(2); + } + if ($cpu_percent > $warn) { + echo $out_msg . ' > ' . $warn . "% : WARNING\n"; + exit(1); + } + + echo $out_msg . ' < ' . $warn . "% : OK\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.pl ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.pl b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.pl new file mode 100644 index 0000000..a5680f7 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl -w +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +use strict; +use Net::SNMP; +use Getopt::Long; + +# Variable +my $base_proc = "1.3.6.1.2.1.25.3.3.1"; +my $proc_load = "1.3.6.1.2.1.25.3.3.1.2"; +my $o_host = undef; +my $o_community = undef; +my $o_warn= undef; +my $o_crit= undef; +my $o_timeout = 15; +my $o_port = 161; + +sub Usage { + print "Usage: $0 -H <host> -C <snmp_community> -w <warn level> -c <crit level>\n"; +} + +Getopt::Long::Configure ("bundling"); +GetOptions( + 'H:s' => \$o_host, + 'C:s' => \$o_community, + 'c:s' => \$o_crit, + 'w:s' => \$o_warn + ); +if (!defined $o_host || !defined $o_community || !defined $o_crit || !defined $o_warn) { + Usage(); + exit 3; +} +$o_warn =~ s/\%//g; +$o_crit =~ s/\%//g; +alarm ($o_timeout); +$SIG{'ALRM'} = sub { + print "Unable to contact host: $o_host\n"; + exit 3; +}; + +# Connect to host +my ($session,$error); +($session, $error) = Net::SNMP->session( + -hostname => $o_host, + -community => $o_community, + -port => $o_port, + -timeout => $o_timeout + ); +if (!defined($session)) { + printf("Error opening session: %s.\n", $error); + exit 3; +} + +my $exit_val=undef; +my $resultat = (Net::SNMP->VERSION < 4) ? + $session->get_table($base_proc) + : $session->get_table(Baseoid => $base_proc); + +if (!defined($resultat)) { + printf("ERROR: Description table : %s.\n", $session->error); + $session->close; + exit 3; +} + +$session->close; + +my ($cpu_used,$ncpu)=(0,0); +foreach my $key ( keys %$resultat) { + if ($key =~ /$proc_load/) { + $cpu_used += $$resultat{$key}; + $ncpu++; + } +} + +if ($ncpu==0) { + print "Can't find CPU usage information : UNKNOWN\n"; + exit 3; +} + +$cpu_used /= $ncpu; + +print "$ncpu CPU, ", $ncpu==1 ? "load" : "average load"; +printf(" %.1f%%",$cpu_used); +$exit_val=0; + +if ($cpu_used > $o_crit) { + print " > $o_crit% : CRITICAL\n"; + $exit_val=2; +} else { + if ($cpu_used > $o_warn) { + print " > $o_warn% : WARNING\n"; + $exit_val=1; + } +} +print " < $o_warn% : OK\n" if ($exit_val eq 0); +exit $exit_val; http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu_ha.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu_ha.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu_ha.php new file mode 100644 index 0000000..91a7c64 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_cpu_ha.php @@ -0,0 +1,116 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:k:r:t:u:e"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); + $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['u']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + $jmx_response_available = false; + $jmx_response; + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + + $object = $json_array['beans'][0]; + + if (count($object) > 0) { + $jmx_response_available = true; + $jmx_response = $object; + } + } + + if ($jmx_response_available === false) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + + $cpu_load = $jmx_response['SystemCpuLoad']; + + if (!isset($jmx_response['SystemCpuLoad']) || $cpu_load < 0.0) { + echo "WARNING: Data unavailable, SystemCpuLoad is not set\n"; + exit(1); + } + + $cpu_count = $jmx_response['AvailableProcessors']; + + $cpu_percent = $cpu_load*100; + + $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%'; + + if ($cpu_percent > $crit) { + echo $out_msg . ' > ' . $crit . "% : CRITICAL\n"; + exit(2); + } + if ($cpu_percent > $warn) { + echo $out_msg . ' > ' . $warn . "% : WARNING\n"; + exit(1); + } + + echo $out_msg . ' < ' . $warn . "% : OK\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_datanode_storage.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_datanode_storage.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_datanode_storage.php new file mode 100644 index 0000000..dee22b4 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_datanode_storage.php @@ -0,0 +1,100 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * check the storage capacity remaining on local datanode storage + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:e:k:r:t:s:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options)) { + usage(); + exit(3); + } + + $host=$options['h']; + $port=$options['p']; + $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); + $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=DataNode,name=FSDatasetState-*", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + $cap_remain = $object['Remaining']; /* Total capacity - any extenal files created in data directories by non-hadoop app */ + $cap_total = $object['Capacity']; /* Capacity used by all data partitions minus space reserved for M/R */ + if (count($object) == 0) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + $percent_full = ($cap_total - $cap_remain)/$cap_total * 100; + + $out_msg = "Capacity:[" . $cap_total . + "], Remaining Capacity:[" . $cap_remain . + "], percent_full:[" . $percent_full . "]"; + + if ($percent_full > $crit) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + if ($percent_full > $warn) { + echo "WARNING: " . $out_msg . "\n"; + exit (1); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_blocks.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_blocks.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_blocks.php new file mode 100644 index 0000000..3693aa0 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_blocks.php @@ -0,0 +1,102 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * check the corrupt or missing blocks % is > threshod + * check_jmx -H hostaddress -p port -w 1% -c 1% + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:s:e:k:r:t:u:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('s', $options)) { + usage(); + exit(3); + } + $hosts=$options['h']; + $port=$options['p']; + $nn_jmx_property=$options['s']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['u']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=NameNode,name=".$nn_jmx_property, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $m_percent = 0; + $object = $json_array['beans'][0]; + $missing_blocks = $object['MissingBlocks']; + $total_blocks = $object['BlocksTotal']; + if (count($object) == 0) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + if($total_blocks == 0) { + $m_percent = 0; + } else { + $m_percent = ($missing_blocks/$total_blocks)*100; + break; + } + } + $out_msg = "missing_blocks:<" . $missing_blocks . + ">, total_blocks:<" . $total_blocks . ">"; + + if ($m_percent > 0) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -s <namenode bean name> -k keytab path -r principal name -t kinit path -u security enabled -e ssl enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_capacity.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_capacity.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_capacity.php new file mode 100644 index 0000000..af72723 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hdfs_capacity.php @@ -0,0 +1,109 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * check the % HDFS capacity used >= warn and critical limits. + * check_jmx -H hostaddress -p port -w 1 -c 1 + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:e:k:r:t:s:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); + $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=NameNode,name=FSNamesystemState", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $percent = 0; + $object = $json_array['beans'][0]; + $CapacityUsed = $object['CapacityUsed']; + $CapacityRemaining = $object['CapacityRemaining']; + if (count($object) == 0) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + $CapacityTotal = $CapacityUsed + $CapacityRemaining; + if($CapacityTotal == 0) { + $percent = 0; + } else { + $percent = ($CapacityUsed/$CapacityTotal)*100; + break; + } + } + $out_msg = "DFSUsedGB:<" . round ($CapacityUsed/(1024*1024*1024),1) . + ">, DFSTotalGB:<" . round($CapacityTotal/(1024*1024*1024),1) . ">"; + + if ($percent >= $crit) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + if ($percent >= $warn) { + echo "WARNING: " . $out_msg . "\n"; + exit (1); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_metastore_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_metastore_status.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_metastore_status.sh new file mode 100644 index 0000000..640c077 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_metastore_status.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +#The uri is of the form thrift://<hostname>:<port> +HOST=$1 +PORT=$2 +JAVA_HOME=$3 +SEC_ENABLED=$4 +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$5 + NAGIOS_USER=$6 + KINIT_PATH=$7 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi +HCAT_URL=-Dhive.metastore.uris="thrift://$HOST:$PORT" +export JAVA_HOME=$JAVA_HOME +out=`hcat $HCAT_URL -e "show databases" 2>&1` +if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error accessing Hive Metastore status [$out]"; + exit 2; +fi +echo "OK: Hive Metastore status OK"; +exit 0; http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_thrift_port.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_thrift_port.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_thrift_port.py new file mode 100644 index 0000000..c9414f7 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hive_thrift_port.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +import os +import optparse +import json +import traceback +from resource_management import * +from time import time + + +OK_MESSAGE = "TCP OK - %.3f second response time on port %s" +CRITICAL_MESSAGE = "Connection to %s on port %s failed" + +def main(): + + parser = optparse.OptionParser() + + parser.add_option("-H", "--host", dest="address", help="Hive thrift host") + parser.add_option("-p", "--port", type="int", dest="port", help="Hive thrift port") + parser.add_option("--security-enabled", action="store_true", dest="security_enabled") + + (options, args) = parser.parse_args() + + if options.address is None: + print "Specify hive thrift host (--host or -H)" + exit(-1) + + if options.port is None: + print "Specify hive thrift port (--port or -p)" + exit(-1) + + if options.security_enabled: + security_enabled = options.security_enabled + else: + security_enabled = False + + address = options.address + port = options.port + + starttime = time() + if check_thrift_port_sasl(address, port, security_enabled=security_enabled): + timetaken = time() - starttime + print OK_MESSAGE % (timetaken, port) + exit(0) + else: + print CRITICAL_MESSAGE % (address, port) + exit(2) + + +if __name__ == "__main__": + main() + http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hue_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hue_status.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hue_status.sh new file mode 100644 index 0000000..076d9b3 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_hue_status.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +status=`/etc/init.d/hue status 2>&1` + +if [[ "$?" -ne 0 ]]; then + echo "WARNING: Hue is stopped"; + exit 1; +fi + +echo "OK: Hue is running"; +exit 0; http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_mapred_local_dir_used.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_mapred_local_dir_used.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_mapred_local_dir_used.sh new file mode 100644 index 0000000..3f9243a --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_mapred_local_dir_used.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +MAPRED_LOCAL_DIRS=$1 +CRITICAL=`echo $2 | cut -d % -f 1` +IFS="," +for mapred_dir in $MAPRED_LOCAL_DIRS +do + percent=`df -hl $mapred_dir | awk '{percent=$5;} END{print percent}' | cut -d % -f 1` + if [ $percent -ge $CRITICAL ]; then + echo "CRITICAL: MapReduce local dir is full." + exit 2 + fi +done +echo "OK: MapReduce local dir space is available." +exit 0 http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_name_dir_status.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_name_dir_status.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_name_dir_status.php new file mode 100644 index 0000000..186166d --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_name_dir_status.php @@ -0,0 +1,93 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to namenode, get the jmx-json document + * check the NameDirStatuses to find any offline (failed) directories + * check_jmx -H hostaddress -p port -k keytab path -r principal name -t kinit path -s security enabled + */ + + include "hdp_nagios_init.php"; + + $options = getopt("h:p:e:k:r:t:s:"); + //Check only for mandatory options + if (!array_key_exists('h', $options) || !array_key_exists('p', $options)) { + usage(); + exit(3); + } + + $host=$options['h']; + $port=$options['p']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + if ($object['NameDirStatuses'] == "") { + echo "WARNING: NameNode directory status not available via ".$protocol."://".$host.":".$port."/jmx url, code " . $info['http_code'] ."\n"; + exit(1); + } + $NameDirStatuses = json_decode($object['NameDirStatuses'], true); + $failed_dir_count = count($NameDirStatuses['failed']); + $out_msg = "CRITICAL: Offline NameNode directories: "; + if ($failed_dir_count > 0) { + foreach ($NameDirStatuses['failed'] as $key => $value) { + $out_msg = $out_msg . $key . ":" . $value . ", "; + } + echo $out_msg . "\n"; + exit (2); + } + echo "OK: All NameNode directories are active" . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled"; + } +?> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_namenodes_ha.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_namenodes_ha.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_namenodes_ha.sh new file mode 100644 index 0000000..83c1aca --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_namenodes_ha.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +IFS=',' read -a namenodes <<< "$1" +port=$2 +totalNN=${#namenodes[@]} +activeNN=() +standbyNN=() +unavailableNN=() + +for nn in "${namenodes[@]}" +do + export no_proxy=$nn + status=$(curl -m 5 -s http://$nn:$port/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem | grep -i "tag.HAState" | grep -o -E "standby|active") + if [ "$status" == "active" ]; then + activeNN[${#activeNN[*]}]="$nn" + elif [ "$status" == "standby" ]; then + standbyNN[${#standbyNN[*]}]="$nn" + elif [ "$status" == "" ]; then + unavailableNN[${#unavailableNN[*]}]="$nn" + fi +done + +message="" +critical=false + +if [ ${#activeNN[@]} -gt 1 ]; then + critical=true + message=$message" Only one NN can have HAState=active;" +elif [ ${#activeNN[@]} == 0 ]; then + critical=true + message=$message" No Active NN available;" +elif [ ${#standbyNN[@]} == 0 ]; then + critical=true + message=$message" No Standby NN available;" +fi + +NNstats=" Active<" +for nn in "${activeNN[@]}" +do + NNstats="$NNstats$nn;" +done +NNstats=${NNstats%\;} +NNstats=$NNstats">, Standby<" +for nn in "${standbyNN[@]}" +do + NNstats="$NNstats$nn;" +done +NNstats=${NNstats%\;} +NNstats=$NNstats">, Unavailable<" +for nn in "${unavailableNN[@]}" +do + NNstats="$NNstats$nn;" +done +NNstats=${NNstats%\;} +NNstats=$NNstats">" + +if [ $critical == false ]; then + echo "OK: NameNode HA healthy;"$NNstats + exit 0 +fi + +echo "CRITICAL:"$message$NNstats +exit 2 http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_nodemanager_health.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_nodemanager_health.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_nodemanager_health.sh new file mode 100644 index 0000000..eedcd62 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_nodemanager_health.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +HOST=$1 +PORT=$2 +NODEMANAGER_URL="http://$HOST:$PORT/ws/v1/node/info" +SEC_ENABLED=$3 +export PATH="/usr/bin:$PATH" +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$4 + NAGIOS_USER=$5 + KINIT_PATH=$6 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi + +export no_proxy=$HOST +RESPONSE=`curl --negotiate -u : -s $NODEMANAGER_URL` +if [[ "$RESPONSE" == *'"nodeHealthy":true'* ]]; then + echo "OK: NodeManager healthy"; + exit 0; +fi +echo "CRITICAL: NodeManager unhealthy"; +exit 2; http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_oozie_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_oozie_status.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_oozie_status.sh new file mode 100644 index 0000000..820ee99 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_oozie_status.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +# OOZIE_URL is of the form http://<hostname>:<port>/oozie +HOST=`echo $1 | tr '[:upper:]' '[:lower:]'` +PORT=$2 +JAVA_HOME=$3 +SEC_ENABLED=$4 +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$5 + NAGIOS_USER=$6 + KINIT_PATH=$7 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi +OOZIE_URL="http://$HOST:$PORT/oozie" +export JAVA_HOME=$JAVA_HOME +out=`oozie admin -oozie ${OOZIE_URL} -status 2>&1` +if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error accessing Oozie Server status [$out]"; + exit 2; +fi +echo "OK: Oozie Server status [$out]"; +exit 0; http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency.php new file mode 100644 index 0000000..463f69b --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency.php @@ -0,0 +1,104 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * It checks the rpc wait time in the queue, RpcQueueTime_avg_time + * check_rpcq_latency -h hostaddress -p port -t ServiceName -w 1 -c 1 + * Warning and Critical values are in seconds + * Service Name = JobTracker, NameNode, JobHistoryServer + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:n:e:k:r:t:s:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options) || !array_key_exists('n', $options)) { + usage(); + exit(3); + } + + $host=$options['h']; + $port=$options['p']; + $master=$options['n']; + $warn=$options['w']; + $crit=$options['c']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=".$master.",name=RpcActivityForPort*", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + if (count($object) == 0) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + $RpcQueueTime_avg_time = round($object['RpcQueueTime_avg_time'], 2); + $RpcProcessingTime_avg_time = round($object['RpcProcessingTime_avg_time'], 2); + + $out_msg = "RpcQueueTime_avg_time:<" . $RpcQueueTime_avg_time . + "> Secs, RpcProcessingTime_avg_time:<" . $RpcProcessingTime_avg_time . + "> Secs"; + + if ($RpcQueueTime_avg_time >= $crit) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + if ($RpcQueueTime_avg_time >= $warn) { + echo "WARNING: " . $out_msg . "\n"; + exit (1); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -n <JobTracker/NameNode/JobHistoryServer> -w <warn_in_sec> -c <crit_in_sec> -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency_ha.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency_ha.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency_ha.php new file mode 100644 index 0000000..3e7616c --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_rpcq_latency_ha.php @@ -0,0 +1,115 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * It checks the rpc wait time in the queue, RpcQueueTime_avg_time + * check_rpcq_latency -h hostaddress -p port -t ServiceName -w 1 -c 1 + * Warning and Critical values are in seconds + * Service Name = JobTracker, NameNode, JobHistoryServer + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:n:e:k:r:t:s:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options) || !array_key_exists('n', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $master=$options['n']; + $warn=$options['w']; + $crit=$options['c']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + $jmx_response_available = false; + $jmx_response; + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=".$master.",name=RpcActivityForPort*", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + + if (count($object) > 0) { + $jmx_response_available = true; + $jmx_response = $object; + } + } + + if ($jmx_response_available === false) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + + $RpcQueueTime_avg_time = round($jmx_response['RpcQueueTime_avg_time'], 2); + $RpcProcessingTime_avg_time = round($jmx_response['RpcProcessingTime_avg_time'], 2); + + $out_msg = "RpcQueueTime_avg_time:<" . $RpcQueueTime_avg_time . + "> Secs, RpcProcessingTime_avg_time:<" . $RpcProcessingTime_avg_time . + "> Secs"; + + if ($RpcQueueTime_avg_time >= $crit) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + if ($RpcQueueTime_avg_time >= $warn) { + echo "WARNING: " . $out_msg . "\n"; + exit (1); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -n <JobTracker/NameNode/JobHistoryServer> -w <warn_in_sec> -c <crit_in_sec> -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled\n"; + } +?> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_templeton_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_templeton_status.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_templeton_status.sh new file mode 100644 index 0000000..3e2ba0f --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_templeton_status.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +# out='{"status":"ok","version":"v1"}<status_code:200>' +HOST=$1 +PORT=$2 +VERSION=$3 +SEC_ENABLED=$4 +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$5 + NAGIOS_USER=$6 + KINIT_PATH=$7 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi +regex="^.*\"status\":\"ok\".*<status_code:200>$" +export no_proxy=$HOST +out=`curl --negotiate -u : -s -w '<status_code:%{http_code}>' http://$HOST:$PORT/templeton/$VERSION/status 2>&1` +if [[ $out =~ $regex ]]; then + out=`echo "$out" | sed -e 's/{/[/g' | sed -e 's/}/]/g'` + echo "OK: WebHCat Server status [$out]"; + exit 0; +fi +echo "CRITICAL: Error accessing WebHCat Server, status [$out]"; +exit 2; http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui.sh new file mode 100644 index 0000000..7044878 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +service=$1 +host=$2 +port=$3 + +checkurl () { + url=$1 + export no_proxy=$host + curl $url -k -o /dev/null + echo $? +} + +if [[ -z "$service" || -z "$host" ]]; then + echo "UNKNOWN: Invalid arguments; Usage: check_webui.sh service_name host_name"; + exit 3; +fi + +case "$service" in + +jobtracker) + jtweburl="http://$host:$port" + if [[ `checkurl "$jtweburl"` -ne 0 ]]; then + echo "WARNING: Jobtracker web UI not accessible : $jtweburl"; + exit 1; + fi + ;; +namenode) + nnweburl="http://$host:$port" + if [[ `checkurl "$nnweburl"` -ne 0 ]] ; then + echo "WARNING: NameNode Web UI not accessible : $nnweburl"; + exit 1; + fi + ;; +jobhistory) + jhweburl="http://$host:$port/jobhistoryhome.jsp" + if [[ `checkurl "$jhweburl"` -ne 0 ]]; then + echo "WARNING: HistoryServer Web UI not accessible : $jhweburl"; + exit 1; + fi + ;; +hbase) + hbaseweburl="http://$host:$port/master-status" + if [[ `checkurl "$hbaseweburl"` -ne 0 ]]; then + echo "WARNING: HBase Master Web UI not accessible : $hbaseweburl"; + exit 1; + fi + ;; +resourcemanager) + rmweburl="http://$host:$port/cluster" + if [[ `checkurl "$rmweburl"` -ne 0 ]]; then + echo "WARNING: ResourceManager Web UI not accessible : $rmweburl"; + exit 1; + fi + ;; +historyserver2) + hsweburl="http://$host:$port/jobhistory" + if [[ `checkurl "$hsweburl"` -ne 0 ]]; then + echo "WARNING: HistoryServer Web UI not accessible : $hsweburl"; + exit 1; + fi + ;; +storm_ui) + rmweburl="http://$host:$port" + if [[ `checkurl "$rmweburl"` -ne 0 ]]; then + echo "WARNING: Storm Web UI not accessible : $rmweburl"; + exit 1; + fi + ;; +falconserver) + hsweburl="http://$host:$port/" + if [[ `checkurl "$hsweburl"` -ne 0 ]]; then + echo "WARNING: FalconServer Web UI not accessible : $hsweburl"; + exit 1; + fi + ;; +*) echo "UNKNOWN: Invalid service name [$service], valid options [jobtracker|jobhistory|hbase|namenode|resourcemanager|historyserver2|falconserver|storm_ui]" + exit 3 + ;; +esac + +echo "OK: Successfully accessed $service Web UI" +exit 0; http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui_ha.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui_ha.sh b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui_ha.sh new file mode 100644 index 0000000..d9a814d --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/check_webui_ha.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +service=$1 +hosts=$2 +port=$3 + +checkurl () { + url=$1 + host=$2 + export no_proxy=$host + curl $url -k -o /dev/null + echo $? +} + +if [[ -z "$service" || -z "$hosts" ]]; then + echo "UNKNOWN: Invalid arguments; Usage: check_webui_ha.sh service_name, host_name"; + exit 3; +fi + +case "$service" in +resourcemanager) + url_end_part="/cluster" + ;; +*) echo "UNKNOWN: Invalid service name [$service], valid options [resourcemanager]" + exit 3 + ;; +esac + +OIFS="$IFS" +IFS=',' +read -a hosts_array <<< "${hosts}" +IFS="$OIFS" + +for host in "${hosts_array[@]}" +do + weburl="http://${host}:${port}${url_end_part}" + if [[ `checkurl "$weburl" "$host"` -eq 0 ]]; then + echo "OK: Successfully accessed $service Web UI" + exit 0; + fi +done + +echo "WARNING: $service Web UI not accessible : $weburl"; +exit 1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf new file mode 100644 index 0000000..87717d2 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Alias /ambarinagios /usr/share/hdp +<Directory /usr/share/hdp> + Options None + AllowOverride None + Order allow,deny + Allow from all +</Directory>