Repository: ambari Updated Branches: refs/heads/trunk 46325c563 -> 21e3d2a73
AMBARI-8544 Integrate AMS with Ambari Alerting System (dsen) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/21e3d2a7 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/21e3d2a7 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/21e3d2a7 Branch: refs/heads/trunk Commit: 21e3d2a73ba2048292e42b5e01c1d95d9d5896f8 Parents: 46325c5 Author: Dmytro Sen <d...@apache.org> Authored: Thu Dec 4 19:27:40 2014 +0200 Committer: Dmytro Sen <d...@apache.org> Committed: Thu Dec 4 19:27:40 2014 +0200 ---------------------------------------------------------------------- .../stacks/HDP/2.2/services/AMS/alerts.json | 139 +++++++++++++++++++ .../files/alert_ambari_metrics_monitor.py | 80 +++++++++++ 2 files changed, 219 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/21e3d2a7/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json new file mode 100644 index 0000000..0a1e469 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json @@ -0,0 +1,139 @@ +{ + "AMS": { + "service": [ + { + "name": "ams_metric_monitor_process_percent", + "label": "Percent AMS Metric Monitors Available", + "interval": 1, + "scope": "SERVICE", + "enabled": true, + "source": { + "type": "AGGREGATE", + "alert_name": "ams_metric_monitor_process", + "reporting": { + "ok": { + "text": "affected: [{1}], total: [{0}]" + }, + "warning": { + "text": "affected: [{1}], total: [{0}]", + "value": 0.1 + }, + "critical": { + "text": "affected: [{1}], total: [{0}]", + "value": 0.3 + } + } + } + } + ], + "METRIC_COLLECTOR": [ + { + "name": "ams_metric_collector_process", + "label": "AMS Metric Collector Process", + "interval": 1, + "scope": "ANY", + "enabled": true, + "source": { + "type": "PORT", + "uri": "8188", + "default_port": 8188, + "reporting": { + "ok": { + "text": "TCP OK - {0:.4f} response on port {1}" + }, + "critical": { + "text": "Connection failed: {0} to {1}:{2}" + } + } + } + }, + { + "name": "ams_metric_collector_hbase_master_process", + "label": "AMS Metric Collector HBase Master Process", + "interval": 1, + "scope": "ANY", + "source": { + "type": "PORT", + "uri": "{{ams-hbase-site/hbase.master.info.port}}", + "default_port": 61310, + "reporting": { + "ok": { + "text": "TCP OK - {0:.4f} response on port {1}" + }, + "critical": { + "text": "Connection failed: {0} to {1}:{2}" + } + } + } + }, + { + "name": "ams_metric_collector_hbase_master_cpu", + "label": "AMS Metric Collector HBase Maser CPU Utilization", + "interval": 5, + "scope": "ANY", + "enabled": true, + "source": { + "type": "METRIC", + "uri": { + "http": "{{ams-hbase-site/hbase.master.info.port}}", + "https": "{{ams-hbase-site/hbase.master.info.port}}", + "https_property": "{{cluster-env/security_enabled}}", + "https_property_value": "true", + "default_port": 61310 + }, + "reporting": { + "ok": { + "text": "{1} CPU, load {0:.1%}" + }, + "warning": { + "text": "{1} CPU, load {0:.1%}", + "value": 200 + }, + "critical": { + "text": "{1} CPU, load {0:.1%}", + "value": 250 + } + }, + "jmx": { + "property_list": [ + "java.lang:type=OperatingSystem/SystemCpuLoad", + "java.lang:type=OperatingSystem/AvailableProcessors" + ], + "value": "{0} * 100" + } + } + }, + { + "name": "ams_metric_collector_zookeeper_server_process", + "label": "AMS Metric Collector ZooKeeper Server Process", + "interval": 1, + "scope": "ANY", + "source": { + "type": "PORT", + "uri": "{{ams-hbase-site/hbase.zookeeper.property.clientPort}}", + "default_port": 61181, + "reporting": { + "ok": { + "text": "TCP OK - {0:.4f} response on port {1}" + }, + "critical": { + "text": "Connection failed: {0} to {1}:{2}" + } + } + } + } + ], + "METRIC_MONITOR": [ + { + "name": "ams_metric_monitor_process", + "label": "AMS Metric Monitor Status", + "interval": 1, + "scope": "ANY", + "source": { + "type": "SCRIPT", + "path": "HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py" + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/ambari/blob/21e3d2a7/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py new file mode 100644 index 0000000..5841267 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import socket + +from resource_management.libraries.functions.check_process_status import check_process_status +from resource_management.core.exceptions import ComponentIsNotRunning + +RESULT_CODE_OK = 'OK' +RESULT_CODE_CRITICAL = 'CRITICAL' +RESULT_CODE_UNKNOWN = 'UNKNOWN' + +AMS_MONITOR_PID_PATH = '/var/run/ambari-metrics-monitor/ambari-metrics-monitor.pid' + + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return () + + +def is_monitor_process_live(pid_file): + """ + Gets whether the AMS monitor represented by the specified file is running. + :param pid_file: the PID file of the monitor to check + :return: True if the monitor is running, False otherwise + """ + live = False + + try: + check_process_status(pid_file) + live = True + except ComponentIsNotRunning: + pass + + return live + + +def execute(parameters=None, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + parameters (dictionary): a mapping of parameter key to value + host_name (string): the name of this host where the alert is running + """ + + if parameters is None: + return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.']) + + if host_name is None: + host_name = socket.getfqdn() + + ams_monitor_process_running = is_monitor_process_live(AMS_MONITOR_PID_PATH) + + alert_state = RESULT_CODE_OK if ams_monitor_process_running else RESULT_CODE_CRITICAL + + alert_label = 'Ambari Monitor is running on {0}' if ams_monitor_process_running else 'Ambari Monitor is NOT running on {0}' + alert_label = alert_label.format(host_name) + + return (alert_state, [alert_label])