Repository: ambari
Updated Branches:
  refs/heads/branch-2.1 2daebc09f -> 8ef6b6063


http://git-wip-us.apache.org/repos/asf/ambari/blob/8ef6b606/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt
 
b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt
new file mode 100644
index 0000000..ce04228
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt
@@ -0,0 +1,178 @@
+jvm.JvmMetrics.GcCount
+jvm.JvmMetrics.GcCountPS
+jvm.JvmMetrics.GcTimeMillis
+jvm.JvmMetrics.GcTimeMillisPS
+jvm.JvmMetrics.LogError
+jvm.JvmMetrics.LogFatal
+jvm.JvmMetrics.LogInfo
+jvm.JvmMetrics.LogWarn
+jvm.JvmMetrics.MemHeapCommittedM
+jvm.JvmMetrics.MemHeapMaxM
+jvm.JvmMetrics.MemHeapUsedM
+jvm.JvmMetrics.MemMaxM
+jvm.JvmMetrics.MemNonHeapCommittedM
+jvm.JvmMetrics.MemNonHeapMaxM
+jvm.JvmMetrics.MemNonHeapUsedM
+jvm.JvmMetrics.ThreadsBlocked
+jvm.JvmMetrics.ThreadsNew
+jvm.JvmMetrics.ThreadsRunnable
+jvm.JvmMetrics.ThreadsTerminated
+jvm.JvmMetrics.ThreadsTimedWaiting
+jvm.JvmMetrics.ThreadsWaiting
+mapred.ShuffleMetrics.ShuffleConnections
+mapred.ShuffleMetrics.ShuffleOutputBytes
+mapred.ShuffleMetrics.ShuffleOutputsFailed
+mapred.ShuffleMetrics.ShuffleOutputsOK
+metricssystem.MetricsSystem.DroppedPubAll
+metricssystem.MetricsSystem.NumActiveSinks
+metricssystem.MetricsSystem.NumActiveSources
+metricssystem.MetricsSystem.NumAllSinks
+metricssystem.MetricsSystem.NumAllSources
+metricssystem.MetricsSystem.PublishAvgTime
+metricssystem.MetricsSystem.PublishNumOps
+metricssystem.MetricsSystem.Sink_timelineAvgTime
+metricssystem.MetricsSystem.Sink_timelineDropped
+metricssystem.MetricsSystem.Sink_timelineNumOps
+metricssystem.MetricsSystem.Sink_timelineQsize
+metricssystem.MetricsSystem.SnapshotAvgTime
+metricssystem.MetricsSystem.SnapshotNumOps
+rpc.rpc.CallQueueLength
+rpc.rpc.NumOpenConnections
+rpc.rpc.ReceivedBytes
+rpc.rpc.RpcAuthenticationFailures
+rpc.rpc.RpcAuthenticationSuccesses
+rpc.rpc.RpcAuthorizationFailures
+rpc.rpc.RpcAuthorizationSuccesses
+rpc.rpc.RpcClientBackoff
+rpc.rpc.RpcProcessingTimeAvgTime
+rpc.rpc.RpcProcessingTimeNumOps
+rpc.rpc.RpcQueueTimeAvgTime
+rpc.rpc.RpcQueueTimeNumOps
+rpc.rpc.RpcSlowCalls
+rpc.rpc.SentBytes
+rpcdetailed.rpcdetailed.AllocateAvgTime
+rpcdetailed.rpcdetailed.AllocateNumOps
+rpcdetailed.rpcdetailed.FinishApplicationMasterAvgTime
+rpcdetailed.rpcdetailed.FinishApplicationMasterNumOps
+rpcdetailed.rpcdetailed.GetApplicationReportAvgTime
+rpcdetailed.rpcdetailed.GetApplicationReportNumOps
+rpcdetailed.rpcdetailed.GetClusterMetricsAvgTime
+rpcdetailed.rpcdetailed.GetClusterMetricsNumOps
+rpcdetailed.rpcdetailed.GetClusterNodesAvgTime
+rpcdetailed.rpcdetailed.GetClusterNodesNumOps
+rpcdetailed.rpcdetailed.GetContainerStatusesAvgTime
+rpcdetailed.rpcdetailed.GetContainerStatusesNumOps
+rpcdetailed.rpcdetailed.GetNewApplicationAvgTime
+rpcdetailed.rpcdetailed.GetNewApplicationNumOps
+rpcdetailed.rpcdetailed.GetQueueInfoAvgTime
+rpcdetailed.rpcdetailed.GetQueueInfoNumOps
+rpcdetailed.rpcdetailed.GetQueueUserAclsAvgTime
+rpcdetailed.rpcdetailed.GetQueueUserAclsNumOps
+rpcdetailed.rpcdetailed.HeartbeatAvgTime
+rpcdetailed.rpcdetailed.HeartbeatNumOps
+rpcdetailed.rpcdetailed.NodeHeartbeatAvgTime
+rpcdetailed.rpcdetailed.NodeHeartbeatNumOps
+rpcdetailed.rpcdetailed.RegisterApplicationMasterAvgTime
+rpcdetailed.rpcdetailed.RegisterApplicationMasterNumOps
+rpcdetailed.rpcdetailed.RegisterNodeManagerAvgTime
+rpcdetailed.rpcdetailed.RegisterNodeManagerNumOps
+rpcdetailed.rpcdetailed.StartContainersAvgTime
+rpcdetailed.rpcdetailed.StartContainersNumOps
+rpcdetailed.rpcdetailed.StopContainersAvgTime
+rpcdetailed.rpcdetailed.StopContainersNumOps
+rpcdetailed.rpcdetailed.SubmitApplicationAvgTime
+rpcdetailed.rpcdetailed.SubmitApplicationNumOps
+ugi.UgiMetrics.GetGroupsAvgTime
+ugi.UgiMetrics.GetGroupsNumOps
+ugi.UgiMetrics.LoginFailureAvgTime
+ugi.UgiMetrics.LoginFailureNumOps
+ugi.UgiMetrics.LoginSuccessAvgTime
+ugi.UgiMetrics.LoginSuccessNumOps
+yarn.ClusterMetrics.AMLaunchDelayAvgTime
+yarn.ClusterMetrics.AMLaunchDelayNumOps
+yarn.ClusterMetrics.AMRegisterDelayAvgTime
+yarn.ClusterMetrics.AMRegisterDelayNumOps
+yarn.ClusterMetrics.NumActiveNMs
+yarn.ClusterMetrics.NumDecommissionedNMs
+yarn.ClusterMetrics.NumLostNMs
+yarn.ClusterMetrics.NumRebootedNMs
+yarn.ClusterMetrics.NumUnhealthyNMs
+yarn.NodeManagerMetrics.AllocatedContainers
+yarn.NodeManagerMetrics.AllocatedGB
+yarn.NodeManagerMetrics.AllocatedVCores
+yarn.NodeManagerMetrics.AvailableGB
+yarn.NodeManagerMetrics.AvailableVCores
+yarn.NodeManagerMetrics.BadLocalDirs
+yarn.NodeManagerMetrics.BadLogDirs
+yarn.NodeManagerMetrics.ContainerLaunchDurationAvgTime
+yarn.NodeManagerMetrics.ContainerLaunchDurationNumOps
+yarn.NodeManagerMetrics.ContainersCompleted
+yarn.NodeManagerMetrics.ContainersFailed
+yarn.NodeManagerMetrics.ContainersIniting
+yarn.NodeManagerMetrics.ContainersKilled
+yarn.NodeManagerMetrics.ContainersLaunched
+yarn.NodeManagerMetrics.ContainersRunning
+yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc
+yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc
+yarn.QueueMetrics.Queue=root.AMResourceLimitMB
+yarn.QueueMetrics.Queue=root.AMResourceLimitVCores
+yarn.QueueMetrics.Queue=root.ActiveApplications
+yarn.QueueMetrics.Queue=root.ActiveUsers
+yarn.QueueMetrics.Queue=root.AggregateContainersAllocated
+yarn.QueueMetrics.Queue=root.AggregateContainersReleased
+yarn.QueueMetrics.Queue=root.AllocatedContainers
+yarn.QueueMetrics.Queue=root.AllocatedMB
+yarn.QueueMetrics.Queue=root.AllocatedVCores
+yarn.QueueMetrics.Queue=root.AppAttemptFirstContainerAllocationDelayAvgTime
+yarn.QueueMetrics.Queue=root.AppAttemptFirstContainerAllocationDelayNumOps
+yarn.QueueMetrics.Queue=root.AppsCompleted
+yarn.QueueMetrics.Queue=root.AppsFailed
+yarn.QueueMetrics.Queue=root.AppsKilled
+yarn.QueueMetrics.Queue=root.AppsPending
+yarn.QueueMetrics.Queue=root.AppsRunning
+yarn.QueueMetrics.Queue=root.AppsSubmitted
+yarn.QueueMetrics.Queue=root.AvailableMB
+yarn.QueueMetrics.Queue=root.AvailableVCores
+yarn.QueueMetrics.Queue=root.PendingContainers
+yarn.QueueMetrics.Queue=root.PendingMB
+yarn.QueueMetrics.Queue=root.PendingVCores
+yarn.QueueMetrics.Queue=root.ReservedContainers
+yarn.QueueMetrics.Queue=root.ReservedMB
+yarn.QueueMetrics.Queue=root.ReservedVCores
+yarn.QueueMetrics.Queue=root.UsedAMResourceMB
+yarn.QueueMetrics.Queue=root.UsedAMResourceVCores
+yarn.QueueMetrics.Queue=root.default.AMResourceLimitMB
+yarn.QueueMetrics.Queue=root.default.AMResourceLimitVCores
+yarn.QueueMetrics.Queue=root.default.ActiveApplications
+yarn.QueueMetrics.Queue=root.default.ActiveUsers
+yarn.QueueMetrics.Queue=root.default.AggregateContainersAllocated
+yarn.QueueMetrics.Queue=root.default.AggregateContainersReleased
+yarn.QueueMetrics.Queue=root.default.AllocatedContainers
+yarn.QueueMetrics.Queue=root.default.AllocatedMB
+yarn.QueueMetrics.Queue=root.default.AllocatedVCores
+yarn.QueueMetrics.Queue=root.default.AppAttemptFirstContainerAllocationDelayAvgTime
+yarn.QueueMetrics.Queue=root.default.AppAttemptFirstContainerAllocationDelayNumOps
+yarn.QueueMetrics.Queue=root.default.AppsCompleted
+yarn.QueueMetrics.Queue=root.default.AppsFailed
+yarn.QueueMetrics.Queue=root.default.AppsKilled
+yarn.QueueMetrics.Queue=root.default.AppsPending
+yarn.QueueMetrics.Queue=root.default.AppsRunning
+yarn.QueueMetrics.Queue=root.default.AppsSubmitted
+yarn.QueueMetrics.Queue=root.default.AvailableMB
+yarn.QueueMetrics.Queue=root.default.AvailableVCores
+yarn.QueueMetrics.Queue=root.default.PendingContainers
+yarn.QueueMetrics.Queue=root.default.PendingMB
+yarn.QueueMetrics.Queue=root.default.PendingVCores
+yarn.QueueMetrics.Queue=root.default.ReservedContainers
+yarn.QueueMetrics.Queue=root.default.ReservedMB
+yarn.QueueMetrics.Queue=root.default.ReservedVCores
+yarn.QueueMetrics.Queue=root.default.UsedAMResourceMB
+yarn.QueueMetrics.Queue=root.default.UsedAMResourceVCores
+yarn.QueueMetrics.Queue=root.default.running_0
+yarn.QueueMetrics.Queue=root.default.running_1440
+yarn.QueueMetrics.Queue=root.default.running_300
+yarn.QueueMetrics.Queue=root.default.running_60
+yarn.QueueMetrics.Queue=root.running_0
+yarn.QueueMetrics.Queue=root.running_1440
+yarn.QueueMetrics.Queue=root.running_300
+yarn.QueueMetrics.Queue=root.running_60

http://git-wip-us.apache.org/repos/asf/ambari/blob/8ef6b606/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py
 
b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py
new file mode 100644
index 0000000..910bde3
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py
@@ -0,0 +1,210 @@
+# !/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import os
+import sys
+import re
+import math
+import collections
+import ast
+
+metric_filename_ext = '.txt'
+# 5 regions for higher order aggregate tables
+other_region_static_count = 5
+
+b_bytes = 1
+k_bytes = 1 << 10  # 1024
+m_bytes = 1 << 20  # 1024^2
+g_bytes = 1 << 30  # 1024^3
+t_bytes = 1 << 40  # 1024^4
+p_bytes = 1 << 50  # 1024^5
+
+def to_number(s):
+  try:
+    return int(re.sub("\D", "", s))
+  except ValueError:
+    return None
+
+def format_Xmx_size_to_bytes(value):
+  strvalue = str(value).lower()
+  if len(strvalue) == 0:
+    return 0
+  modifier = strvalue[-1]
+
+  if modifier == ' ' or modifier in "0123456789":
+    modifier = 'b'
+
+  m = {
+    modifier == 'b': b_bytes,
+    modifier == 'k': k_bytes,
+    modifier == 'm': m_bytes,
+    modifier == 'g': g_bytes,
+    modifier == 't': t_bytes,
+    modifier == 'p': p_bytes
+    } [1]
+  return to_number(strvalue) * m
+
+# Class that takes AMS HBase configs as input and determines the Region
+# pre-splits based on selected services also passed as a parameter to the 
class.
+class FindSplitPointsForAMSRegions():
+
+  def __init__(self, ams_hbase_site, ams_hbase_env, serviceMetricsDir,
+               operation_mode = 'embedded', services = None):
+    self.ams_hbase_site = ams_hbase_site
+    self.ams_hbase_env = ams_hbase_env
+    self.serviceMetricsDir = serviceMetricsDir
+    self.services = services
+    self.mode = operation_mode
+    # Initialize before user
+    self.initialize()
+
+  def initialize(self):
+    # calculate regions based on available memory
+    self.initialize_region_counts()
+    self.initialize_ordered_set_of_metrics()
+
+  def initialize_region_counts(self):
+    try:
+      xmx_master_bytes = 
format_Xmx_size_to_bytes(self.ams_hbase_env['hbase_master_heapsize'])
+      xmx_region_bytes = 
format_Xmx_size_to_bytes(self.ams_hbase_env['hbase_regionserver_heapsize'])
+      xmx_bytes = xmx_master_bytes + xmx_region_bytes
+      if self.mode == 'distributed':
+        xmx_bytes = xmx_region_bytes
+
+      memstore_max_mem = 
float(self.ams_hbase_site['hbase.regionserver.global.memstore.upperLimit']) * 
xmx_bytes
+      memstore_flush_size = 
format_Xmx_size_to_bytes(self.ams_hbase_site['hbase.hregion.memstore.flush.size'])
+
+      max_inmemory_regions = (memstore_max_mem / memstore_flush_size) - 
other_region_static_count
+      print 'max_inmemory_regions: %s' % max_inmemory_regions
+
+      if max_inmemory_regions > 2:
+        # Lets say total = 12, so we have 7 regions to allocate between
+        # METRIC_RECORD and METRIC_AGGREGATE tables, desired = (5, 2)
+        self.desired_precision_region_count = int(math.floor(0.8 * 
max_inmemory_regions))
+        self.desired_aggregate_region_count = int(max_inmemory_regions - 
self.desired_precision_region_count)
+      else:
+        self.desired_precision_region_count = 1
+        self.desired_aggregate_region_count = 1
+
+    except:
+      print('Bad config settings, could not calculate max regions available.')
+    pass
+
+  def initialize_ordered_set_of_metrics(self):
+    onlyServicefiles = [ f for f in os.listdir(self.serviceMetricsDir) if
+                  os.path.isfile(os.path.join(self.serviceMetricsDir, f)) ]
+
+    metrics = set()
+
+    for file in onlyServicefiles:
+      # Process for services selected at deploy time or all services if
+      # services arg is not passed
+      if self.services is None or file.rstrip(metric_filename_ext) in 
self.services:
+        print 'Processing file: %s' % os.path.join(self.serviceMetricsDir, 
file)
+        with open(os.path.join(self.serviceMetricsDir, file), 'r') as f:
+          for metric in f:
+            metrics.add(metric.strip())
+      pass
+    pass
+
+    self.metrics = sorted(metrics)
+    print 'metrics length: %s' % len(self.metrics)
+
+  def get_split_points(self):
+    split_points = collections.namedtuple('SplitPoints', [ 'precision', 
'aggregate' ])
+    split_points.precision = []
+    split_points.aggregate = []
+
+    metric_list = list(self.metrics)
+    metrics_total = len(metric_list)
+
+    print 'desired_precision_region_count: %s' % 
self.desired_precision_region_count
+    print 'desired_aggregate_region_count: %s' % 
self.desired_aggregate_region_count
+
+    if self.desired_precision_region_count > 1:
+      idx = int(math.ceil(metrics_total / self.desired_precision_region_count))
+      index = idx
+      for i in range(0, self.desired_precision_region_count - 1):
+        if index < metrics_total - 1:
+          split_points.precision.append(metric_list[index])
+          index += idx
+
+    if self.desired_aggregate_region_count > 1:
+      idx = int(math.ceil(metrics_total / self.desired_aggregate_region_count))
+      index = idx
+      for i in range(0, self.desired_aggregate_region_count - 1):
+        if index < metrics_total - 1:
+          split_points.aggregate.append(metric_list[index])
+          index += idx
+
+    return split_points
+  pass
+
+def main(argv = None):
+  scriptDir = os.path.realpath(os.path.dirname(argv[0]))
+  serviceMetricsDir = os.path.join(scriptDir, 'service-metrics')
+  if os.path.exists(serviceMetricsDir):
+    onlyargs = argv[1:]
+    if len(onlyargs) < 3:
+      sys.stderr.write("Usage: dict(ams-hbase-site) dict(ams-hbase-env) 
list(services)\n")
+      sys.exit(2)
+    pass
+
+    ams_hbase_site = None
+    ams_hbase_env = None
+    services = None
+    try:
+      ams_hbase_site = ast.literal_eval(str(onlyargs[0]))
+      ams_hbase_env = ast.literal_eval(str(onlyargs[1]))
+      services = onlyargs[2]
+      if services:
+        services = str(services).split(',')
+      pass
+    except Exception, ex:
+      sys.stderr.write(str(ex))
+      sys.stderr.write("\nUsage: Expected items not found in input. Found "
+                      " ams-hbase-site => {0}, ams-hbase-env => {1},"
+                      " services => {2}".format(ams_hbase_site, ams_hbase_env, 
services))
+      sys.exit(2)
+
+    print '--------- AMS Regions Split point finder ---------'
+    print 'Services: %s' % services
+
+    mode = 'distributed' if 'hbase.rootdir' in ams_hbase_site and \
+                            'hdfs' in ams_hbase_site['hbase.rootdir'] else \
+                            'embedded'
+
+    split_point_finder = FindSplitPointsForAMSRegions(
+      ams_hbase_site, ams_hbase_env, serviceMetricsDir, mode, services)
+
+    result = split_point_finder.get_split_points()
+    print 'Split points for precision table : %s' % len(result.precision)
+    print 'precision: %s' % str(result.precision)
+    print 'Split points for aggregate table : %s' % len(result.aggregate)
+    print 'aggregate: %s' % str(result.aggregate)
+
+    return 0
+
+  else:
+    print 'Cannot find service metrics dir in %s' % scriptDir
+
+if __name__ == '__main__':
+  main(sys.argv)
+

Reply via email to