AMBARI-20565. Ambari Agent Alert to detect when 'hdp-select versions' reports 
an error (alejandro)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/f18fad36
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/f18fad36
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/f18fad36

Branch: refs/heads/branch-feature-AMBARI-12556
Commit: f18fad36821e148b69138ec2be6b5cd70ba207cc
Parents: e3d9ff6
Author: Alejandro Fernandez <afernan...@hortonworks.com>
Authored: Fri Mar 24 12:24:18 2017 -0700
Committer: Alejandro Fernandez <afernan...@hortonworks.com>
Committed: Tue Mar 28 17:26:15 2017 -0700

----------------------------------------------------------------------
 .../libraries/functions/stack_select.py         |  16 ++-
 .../server/checks/AtlasPresenceCheck.java       |   4 +-
 ambari-server/src/main/resources/alerts.json    |  12 +++
 .../host_scripts/alert_version_select.py        | 104 +++++++++++++++++++
 .../server/api/services/AmbariMetaInfoTest.java |  14 +--
 .../metadata/AgentAlertDefinitionsTest.java     |   2 +-
 6 files changed, 141 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/f18fad36/ambari-common/src/main/python/resource_management/libraries/functions/stack_select.py
----------------------------------------------------------------------
diff --git 
a/ambari-common/src/main/python/resource_management/libraries/functions/stack_select.py
 
b/ambari-common/src/main/python/resource_management/libraries/functions/stack_select.py
index 20b4cbd..79393b9 100644
--- 
a/ambari-common/src/main/python/resource_management/libraries/functions/stack_select.py
+++ 
b/ambari-common/src/main/python/resource_management/libraries/functions/stack_select.py
@@ -290,11 +290,25 @@ def _get_upgrade_stack():
 
   return None
 
+def unsafe_get_stack_versions():
+  """
+  Gets list of stack versions installed on the host.
+  By default a call to <stack-selector-tool> versions is made to get the list 
of installed stack versions.
+  DO NOT use a fall-back since this function is called by alerts in order to 
find potential errors.
+  :return: Returns a tuple of (exit code, output, list of installed stack 
versions).
+  """
+  stack_selector_path = 
stack_tools.get_stack_tool_path(stack_tools.STACK_SELECTOR_NAME)
+  code, out = call((STACK_SELECT_PREFIX, stack_selector_path, 'versions'))
+  versions = []
+  if 0 == code:
+    for line in out.splitlines():
+      versions.append(line.rstrip('\n'))
+  return (code, out, versions)
 
 def get_stack_versions(stack_root):
   """
   Gets list of stack versions installed on the host.
-  Be default a call to <stack-selector-tool> versions is made to get the list 
of installed stack versions.
+  By default a call to <stack-selector-tool> versions is made to get the list 
of installed stack versions.
   As a fallback list of installed versions is collected from stack version 
directories in stack install root.
   :param stack_root: Stack install root
   :return: Returns list of installed stack versions.

http://git-wip-us.apache.org/repos/asf/ambari/blob/f18fad36/ambari-server/src/main/java/org/apache/ambari/server/checks/AtlasPresenceCheck.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/checks/AtlasPresenceCheck.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/checks/AtlasPresenceCheck.java
index 8556436..04b73fa 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/checks/AtlasPresenceCheck.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/checks/AtlasPresenceCheck.java
@@ -29,8 +29,8 @@ import org.slf4j.LoggerFactory;
 import com.google.inject.Singleton;
 
 /**
- * Checks if Atlas service is present. Upgrade to stack HDP 2.5 can't pursuit
- * with existed on the cluster Atlas service.
+ * Checks if Atlas service is present. Upgrade to stack HDP 2.5 from previous 
stack
+ * must first delete Atlas from the cluster.
  */
 @Singleton
 @UpgradeCheck(group = UpgradeCheckGroup.DEFAULT)

http://git-wip-us.apache.org/repos/asf/ambari/blob/f18fad36/ambari-server/src/main/resources/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/alerts.json 
b/ambari-server/src/main/resources/alerts.json
index 2559b3a..d646401 100644
--- a/ambari-server/src/main/resources/alerts.json
+++ b/ambari-server/src/main/resources/alerts.json
@@ -179,6 +179,18 @@
             }
           ]
         }
+      },
+      {
+        "name": "ambari_agent_version_select",
+        "label": "Ambari Agent Distro/Conf Select Versions",
+        "description": "This host-level alert is triggered if the distro 
selector such as hdp-select cannot calculate versions available on this host. 
This may indicate that /usr/$stack/ directory has links/dirs that do not belong 
inside of it.",
+        "interval": 5,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": "alert_version_select.py"
+        }
       }
 
     ]

http://git-wip-us.apache.org/repos/asf/ambari/blob/f18fad36/ambari-server/src/main/resources/host_scripts/alert_version_select.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/host_scripts/alert_version_select.py 
b/ambari-server/src/main/resources/host_scripts/alert_version_select.py
new file mode 100644
index 0000000..118911f
--- /dev/null
+++ b/ambari-server/src/main/resources/host_scripts/alert_version_select.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import logging
+import socket
+import json
+
+from resource_management.libraries.script.script import Script
+from resource_management.libraries.functions.stack_select import 
unsafe_get_stack_versions
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_WARNING = 'WARNING'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+
+STACK_TOOLS = '{{cluster-env/stack_tools}}'
+
+
+logger = logging.getLogger()
+
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (STACK_TOOLS,)
+
+
+def execute(configurations={}, parameters={}, host_name=None):
+  """
+  Checks if the stack selector such as hdp-select can find versions installed 
on this host. E.g.,
+  hdp-select versions
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+  msg = []
+  try:
+    if configurations is None:
+      return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to 
the script.'])
+
+    # Check required properties
+    if STACK_TOOLS not in configurations:
+      return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the 
script'.format(STACK_TOOLS)])
+
+    # Of the form,
+    # { "stack_selector": ["hdp-select", "/usr/bin/hdp-select", "hdp-select"], 
"conf_selector": ["conf-select", "/usr/bin/conf-select", "conf-select"] }
+    stack_tools_str = configurations[STACK_TOOLS]
+
+    if stack_tools_str is None:
+      return (RESULT_STATE_UNKNOWN, ['{} is a required parameter for the 
script and the value is null'.format(STACK_TOOLS)])
+
+    distro_select = "unknown-distro-select"
+    try:
+      stack_tools = json.loads(stack_tools_str)
+      distro_select = stack_tools["stack_selector"][0]
+    except:
+      pass
+
+    # This may not exist if the host does not contain any stack components,
+    # or only contains components like Ambari Metrics and SmartSense
+    stack_root_dir = Script.get_stack_root()
+
+    if os.path.isdir(stack_root_dir):
+      (code, out, versions) = unsafe_get_stack_versions()
+
+      if code == 0:
+        msg.append("Ok. {}".format(distro_select))
+        if versions is not None and type(versions) is list and len(versions) > 
0:
+          msg.append("Versions: {}".format(", ".join(versions)))
+        return (RESULT_STATE_OK, ["\n".join(msg)])
+      else:
+        msg.append("Failed, check dir {} for unexpected 
contents.".format(stack_root_dir))
+        if out is not None:
+          msg.append(out)
+
+        return (RESULT_STATE_CRITICAL, ["\n".join(msg)])
+    else:
+      msg.append("Ok. No stack root {} to check.".format(stack_root_dir))
+      return (RESULT_STATE_OK, ["\n".join(msg)])
+  except Exception, e:
+    return (RESULT_STATE_CRITICAL, [e.message])

http://git-wip-us.apache.org/repos/asf/ambari/blob/f18fad36/ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java
 
b/ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java
index f1af66f..9ff7def 100644
--- 
a/ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java
+++ 
b/ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java
@@ -1935,7 +1935,7 @@ public class AmbariMetaInfoTest {
 
     AlertDefinitionDAO dao = injector.getInstance(AlertDefinitionDAO.class);
     List<AlertDefinitionEntity> definitions = dao.findAll(clusterId);
-    assertEquals(11, definitions.size());
+    assertEquals(12, definitions.size());
 
     // figure out how many of these alerts were merged into from the
     // non-stack alerts.json
@@ -1947,7 +1947,7 @@ public class AmbariMetaInfoTest {
       }
     }
 
-    assertEquals(2, hostAlertCount);
+    assertEquals(3, hostAlertCount);
     assertEquals(9, definitions.size() - hostAlertCount);
 
     for (AlertDefinitionEntity definition : definitions) {
@@ -1958,7 +1958,7 @@ public class AmbariMetaInfoTest {
     metaInfo.reconcileAlertDefinitions(clusters);
 
     definitions = dao.findAll();
-    assertEquals(11, definitions.size());
+    assertEquals(12, definitions.size());
 
     for (AlertDefinitionEntity definition : definitions) {
       assertEquals(28, definition.getScheduleInterval().intValue());
@@ -1967,7 +1967,7 @@ public class AmbariMetaInfoTest {
     // find all enabled for the cluster should find 6 (the ones from HDFS;
     // it will not find the agent alert since it's not bound to the cluster)
     definitions = dao.findAllEnabled(cluster.getClusterId());
-    assertEquals(10, definitions.size());
+    assertEquals(11, definitions.size());
 
     // create new definition
     AlertDefinitionEntity entity = new AlertDefinitionEntity();
@@ -1986,19 +1986,19 @@ public class AmbariMetaInfoTest {
 
     // verify the new definition is found (6 HDFS + 1 new one)
     definitions = dao.findAllEnabled(cluster.getClusterId());
-    assertEquals(11, definitions.size());
+    assertEquals(12, definitions.size());
 
     // reconcile, which should disable our bad definition
     metaInfo.reconcileAlertDefinitions(clusters);
 
     // find all enabled for the cluster should find 6
     definitions = dao.findAllEnabled(cluster.getClusterId());
-    assertEquals(10, definitions.size());
+    assertEquals(11, definitions.size());
 
     // find all should find 6 HDFS + 1 disabled + 1 agent alert + 2 server
     // alerts
     definitions = dao.findAll();
-    assertEquals(12, definitions.size());
+    assertEquals(13, definitions.size());
 
     entity = dao.findById(entity.getDefinitionId());
     assertFalse(entity.getEnabled());

http://git-wip-us.apache.org/repos/asf/ambari/blob/f18fad36/ambari-server/src/test/java/org/apache/ambari/server/metadata/AgentAlertDefinitionsTest.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/test/java/org/apache/ambari/server/metadata/AgentAlertDefinitionsTest.java
 
b/ambari-server/src/test/java/org/apache/ambari/server/metadata/AgentAlertDefinitionsTest.java
index cbc5e69..7378b8c 100644
--- 
a/ambari-server/src/test/java/org/apache/ambari/server/metadata/AgentAlertDefinitionsTest.java
+++ 
b/ambari-server/src/test/java/org/apache/ambari/server/metadata/AgentAlertDefinitionsTest.java
@@ -60,7 +60,7 @@ public class AgentAlertDefinitionsTest {
   public void testLoadingAgentHostAlerts() {
     AmbariServiceAlertDefinitions ambariServiceAlertDefinitions = 
m_injector.getInstance(AmbariServiceAlertDefinitions.class);
     List<AlertDefinition> definitions = 
ambariServiceAlertDefinitions.getAgentDefinitions();
-    Assert.assertEquals(2, definitions.size());
+    Assert.assertEquals(3, definitions.size());
 
     for( AlertDefinition definition : definitions){
       Assert.assertEquals(Components.AMBARI_AGENT.name(),

Reply via email to