AMBARI-7534 - Alerts: Disabling an Alert Should Propagate to Agent (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/10f1f73b Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/10f1f73b Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/10f1f73b Branch: refs/heads/trunk Commit: 10f1f73b810e2c84d5818314ff9d1743c644a3bf Parents: 7324374 Author: Jonathan Hurley <jhur...@hortonworks.com> Authored: Sat Sep 27 09:33:43 2014 -0400 Committer: Jonathan Hurley <jhur...@hortonworks.com> Committed: Sat Sep 27 09:33:43 2014 -0400 ---------------------------------------------------------------------- .../ambari_agent/AlertSchedulerHandler.py | 27 ++++++-- .../python/ambari_agent/alerts/base_alert.py | 12 +++- .../src/test/python/ambari_agent/TestAlerts.py | 68 +++++++++++++++++++- .../ambari_agent/dummy_files/definitions.json | 1 + 4 files changed, 101 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py index 8dcce50..c645cba 100644 --- a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py +++ b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py @@ -125,7 +125,7 @@ class AlertSchedulerHandler(): uuid_valid = False for definition in definitions: - definition_uuid = definition.definition_uuid() + definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break @@ -141,7 +141,7 @@ class AlertSchedulerHandler(): for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: - definition_uuid = definition.definition_uuid() + definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break @@ -264,7 +264,16 @@ class AlertSchedulerHandler(): ''' Schedule a definition (callable). Scheduled jobs are given the UUID as their name so that they can be identified later on. + <p/> + This function can be called with a definition that is disabled; it will + simply NOOP. ''' + # NOOP if the definition is disabled; don't schedule it + if definition.is_enabled() == False: + logger.info("The alert {0} with UUID {1} is disabled and will not be scheduled".format( + definition.get_name(),definition.get_uuid())) + return + job = None if self.__in_minutes: @@ -277,10 +286,20 @@ class AlertSchedulerHandler(): # although the documentation states that Job(kwargs) takes a name # key/value pair, it does not actually set the name; do it manually if job is not None: - job.name = definition.definition_uuid() + job.name = definition.get_uuid() logger.info("Scheduling {0} with UUID {1}".format( - definition.definition_name(), definition.definition_uuid())) + definition.get_name(), definition.get_uuid())) + + def get_job_count(self): + ''' + Gets the number of jobs currently scheduled. This is mainly used for + test verification of scheduling + ''' + if self.__scheduler is None: + return 0 + + return len(self.__scheduler.get_jobs()) def main(): args = list(sys.argv) http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py index 10dcff8..a3eb404 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py @@ -48,14 +48,21 @@ class BaseAlert(object): return 1 if interval < 1 else interval - def definition_name(self): + def is_enabled(self): + ''' + gets whether the definition is enabled + ''' + return self.alert_meta['enabled'] + + + def get_name(self): ''' gets the unique name of the alert definition ''' return self.alert_meta['name'] - def definition_uuid(self): + def get_uuid(self): ''' gets the unique has of the alert definition ''' @@ -101,6 +108,7 @@ class BaseAlert(object): data['component'] = self._find_value('componentName') data['timestamp'] = long(time.time() * 1000) data['uuid'] = self._find_value('uuid') + data['enabled'] = self._find_value('enabled') if logger.isEnabledFor(logging.DEBUG): logger.debug("debug alert text: {0}".format(data['text'])) http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py index 662d8ee..f9c2ab4 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py +++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py @@ -34,9 +34,11 @@ class TestAlerts(TestCase): def setUp(self): pass + def tearDown(self): sys.stdout == sys.__stdout__ + @patch.object(Scheduler, "add_interval_job") @patch.object(Scheduler, "start") def test_start(self, aps_add_interval_job_mock, aps_start_mock): @@ -49,6 +51,7 @@ class TestAlerts(TestCase): self.assertTrue(aps_add_interval_job_mock.called) self.assertTrue(aps_start_mock.called) + def test_port_alert(self): json = { "name": "namenode_process", "service": "HDFS", @@ -56,6 +59,7 @@ class TestAlerts(TestCase): "label": "NameNode process", "interval": 6, "scope": "host", + "enabled": True, "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1", "source": { "type": "PORT", @@ -80,6 +84,7 @@ class TestAlerts(TestCase): res = pa.collect() + def test_port_alert_no_sub(self): json = { "name": "namenode_process", "service": "HDFS", @@ -87,6 +92,7 @@ class TestAlerts(TestCase): "label": "NameNode process", "interval": 6, "scope": "host", + "enabled": True, "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1", "source": { "type": "PORT", @@ -109,6 +115,7 @@ class TestAlerts(TestCase): res = pa.collect() + def test_script_alert(self): json = { "name": "namenode_process", @@ -117,6 +124,7 @@ class TestAlerts(TestCase): "label": "NameNode process", "interval": 6, "scope": "host", + "enabled": True, "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1", "source": { "type": "SCRIPT", @@ -145,7 +153,8 @@ class TestAlerts(TestCase): self.assertEquals('WARNING', collector.alerts()[0]['state']) self.assertEquals('all is not well', collector.alerts()[0]['text']) - + + @patch.object(MetricAlert, "_load_jmx") def test_metric_alert(self, ma_load_jmx_mock): json = { @@ -155,6 +164,7 @@ class TestAlerts(TestCase): "label": "NameNode process", "interval": 6, "scope": "host", + "enabled": True, "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1", "source": { "type": "METRIC", @@ -201,13 +211,17 @@ class TestAlerts(TestCase): self.assertEquals('OK', collector.alerts()[0]['state']) self.assertEquals('ok_arr: 1 3 None', collector.alerts()[0]['text']) + def test_reschedule(self): test_file_path = os.path.join('ambari_agent', 'dummy_files') test_stack_path = os.path.join('ambari_agent', 'dummy_files') ash = AlertSchedulerHandler(test_file_path, test_stack_path) ash.start() + + self.assertEquals(1, ash.get_job_count()) ash.reschedule() + self.assertEquals(1, ash.get_job_count()) def test_alert_collector_purge(self): @@ -217,6 +231,7 @@ class TestAlerts(TestCase): "label": "NameNode process", "interval": 6, "scope": "host", + "enabled": True, "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1", "source": { "type": "PORT", @@ -247,3 +262,54 @@ class TestAlerts(TestCase): collector.remove_by_uuid('c1f73191-4481-4435-8dae-fd380e4c0be1') self.assertEquals(0,len(collector.alerts())) + + def test_disabled_definitions(self): + test_file_path = os.path.join('ambari_agent', 'dummy_files') + test_stack_path = os.path.join('ambari_agent', 'dummy_files') + + ash = AlertSchedulerHandler(test_file_path, test_stack_path) + ash.start() + + self.assertEquals(1, ash.get_job_count()) + + json = { "name": "namenode_process", + "service": "HDFS", + "component": "NAMENODE", + "label": "NameNode process", + "interval": 6, + "scope": "host", + "enabled": True, + "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1", + "source": { + "type": "PORT", + "uri": "{{hdfs-site/my-key}}", + "default_port": 50070, + "reporting": { + "ok": { + "text": "TCP OK - {0:.4f} response time on port {1}" + }, + "critical": { + "text": "Could not load process info: {0}" + } + } + } + } + + pa = PortAlert(json, json['source']) + ash.schedule_definition(pa) + + self.assertEquals(2, ash.get_job_count()) + + json['enabled'] = False + pa = PortAlert(json, json['source']) + ash.schedule_definition(pa) + + # verify disabled alert not scheduled + self.assertEquals(2, ash.get_job_count()) + + json['enabled'] = True + pa = PortAlert(json, json['source']) + ash.schedule_definition(pa) + + # verify enabled alert was scheduled + self.assertEquals(3, ash.get_job_count()) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json index 30973c2..8e01833 100644 --- a/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json +++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json @@ -17,6 +17,7 @@ "interval": 6, "scope": "host", "uuid": "3f82ae27-fa6a-465b-b77d-67963ac55d2f", + "enabled": true, "source": { "type": "PORT", "uri": "{{hdfs-site/dfs.namenode.http-address}}",