Repository: ambari Updated Branches: refs/heads/branch-2.1 2daebc09f -> 8ef6b6063
http://git-wip-us.apache.org/repos/asf/ambari/blob/8ef6b606/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt new file mode 100644 index 0000000..ce04228 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/service-metrics/YARN.txt @@ -0,0 +1,178 @@ +jvm.JvmMetrics.GcCount +jvm.JvmMetrics.GcCountPS +jvm.JvmMetrics.GcTimeMillis +jvm.JvmMetrics.GcTimeMillisPS +jvm.JvmMetrics.LogError +jvm.JvmMetrics.LogFatal +jvm.JvmMetrics.LogInfo +jvm.JvmMetrics.LogWarn +jvm.JvmMetrics.MemHeapCommittedM +jvm.JvmMetrics.MemHeapMaxM +jvm.JvmMetrics.MemHeapUsedM +jvm.JvmMetrics.MemMaxM +jvm.JvmMetrics.MemNonHeapCommittedM +jvm.JvmMetrics.MemNonHeapMaxM +jvm.JvmMetrics.MemNonHeapUsedM +jvm.JvmMetrics.ThreadsBlocked +jvm.JvmMetrics.ThreadsNew +jvm.JvmMetrics.ThreadsRunnable +jvm.JvmMetrics.ThreadsTerminated +jvm.JvmMetrics.ThreadsTimedWaiting +jvm.JvmMetrics.ThreadsWaiting +mapred.ShuffleMetrics.ShuffleConnections +mapred.ShuffleMetrics.ShuffleOutputBytes +mapred.ShuffleMetrics.ShuffleOutputsFailed +mapred.ShuffleMetrics.ShuffleOutputsOK +metricssystem.MetricsSystem.DroppedPubAll +metricssystem.MetricsSystem.NumActiveSinks +metricssystem.MetricsSystem.NumActiveSources +metricssystem.MetricsSystem.NumAllSinks +metricssystem.MetricsSystem.NumAllSources +metricssystem.MetricsSystem.PublishAvgTime +metricssystem.MetricsSystem.PublishNumOps +metricssystem.MetricsSystem.Sink_timelineAvgTime +metricssystem.MetricsSystem.Sink_timelineDropped +metricssystem.MetricsSystem.Sink_timelineNumOps +metricssystem.MetricsSystem.Sink_timelineQsize +metricssystem.MetricsSystem.SnapshotAvgTime +metricssystem.MetricsSystem.SnapshotNumOps +rpc.rpc.CallQueueLength +rpc.rpc.NumOpenConnections +rpc.rpc.ReceivedBytes +rpc.rpc.RpcAuthenticationFailures +rpc.rpc.RpcAuthenticationSuccesses +rpc.rpc.RpcAuthorizationFailures +rpc.rpc.RpcAuthorizationSuccesses +rpc.rpc.RpcClientBackoff +rpc.rpc.RpcProcessingTimeAvgTime +rpc.rpc.RpcProcessingTimeNumOps +rpc.rpc.RpcQueueTimeAvgTime +rpc.rpc.RpcQueueTimeNumOps +rpc.rpc.RpcSlowCalls +rpc.rpc.SentBytes +rpcdetailed.rpcdetailed.AllocateAvgTime +rpcdetailed.rpcdetailed.AllocateNumOps +rpcdetailed.rpcdetailed.FinishApplicationMasterAvgTime +rpcdetailed.rpcdetailed.FinishApplicationMasterNumOps +rpcdetailed.rpcdetailed.GetApplicationReportAvgTime +rpcdetailed.rpcdetailed.GetApplicationReportNumOps +rpcdetailed.rpcdetailed.GetClusterMetricsAvgTime +rpcdetailed.rpcdetailed.GetClusterMetricsNumOps +rpcdetailed.rpcdetailed.GetClusterNodesAvgTime +rpcdetailed.rpcdetailed.GetClusterNodesNumOps +rpcdetailed.rpcdetailed.GetContainerStatusesAvgTime +rpcdetailed.rpcdetailed.GetContainerStatusesNumOps +rpcdetailed.rpcdetailed.GetNewApplicationAvgTime +rpcdetailed.rpcdetailed.GetNewApplicationNumOps +rpcdetailed.rpcdetailed.GetQueueInfoAvgTime +rpcdetailed.rpcdetailed.GetQueueInfoNumOps +rpcdetailed.rpcdetailed.GetQueueUserAclsAvgTime +rpcdetailed.rpcdetailed.GetQueueUserAclsNumOps +rpcdetailed.rpcdetailed.HeartbeatAvgTime +rpcdetailed.rpcdetailed.HeartbeatNumOps +rpcdetailed.rpcdetailed.NodeHeartbeatAvgTime +rpcdetailed.rpcdetailed.NodeHeartbeatNumOps +rpcdetailed.rpcdetailed.RegisterApplicationMasterAvgTime +rpcdetailed.rpcdetailed.RegisterApplicationMasterNumOps +rpcdetailed.rpcdetailed.RegisterNodeManagerAvgTime +rpcdetailed.rpcdetailed.RegisterNodeManagerNumOps +rpcdetailed.rpcdetailed.StartContainersAvgTime +rpcdetailed.rpcdetailed.StartContainersNumOps +rpcdetailed.rpcdetailed.StopContainersAvgTime +rpcdetailed.rpcdetailed.StopContainersNumOps +rpcdetailed.rpcdetailed.SubmitApplicationAvgTime +rpcdetailed.rpcdetailed.SubmitApplicationNumOps +ugi.UgiMetrics.GetGroupsAvgTime +ugi.UgiMetrics.GetGroupsNumOps +ugi.UgiMetrics.LoginFailureAvgTime +ugi.UgiMetrics.LoginFailureNumOps +ugi.UgiMetrics.LoginSuccessAvgTime +ugi.UgiMetrics.LoginSuccessNumOps +yarn.ClusterMetrics.AMLaunchDelayAvgTime +yarn.ClusterMetrics.AMLaunchDelayNumOps +yarn.ClusterMetrics.AMRegisterDelayAvgTime +yarn.ClusterMetrics.AMRegisterDelayNumOps +yarn.ClusterMetrics.NumActiveNMs +yarn.ClusterMetrics.NumDecommissionedNMs +yarn.ClusterMetrics.NumLostNMs +yarn.ClusterMetrics.NumRebootedNMs +yarn.ClusterMetrics.NumUnhealthyNMs +yarn.NodeManagerMetrics.AllocatedContainers +yarn.NodeManagerMetrics.AllocatedGB +yarn.NodeManagerMetrics.AllocatedVCores +yarn.NodeManagerMetrics.AvailableGB +yarn.NodeManagerMetrics.AvailableVCores +yarn.NodeManagerMetrics.BadLocalDirs +yarn.NodeManagerMetrics.BadLogDirs +yarn.NodeManagerMetrics.ContainerLaunchDurationAvgTime +yarn.NodeManagerMetrics.ContainerLaunchDurationNumOps +yarn.NodeManagerMetrics.ContainersCompleted +yarn.NodeManagerMetrics.ContainersFailed +yarn.NodeManagerMetrics.ContainersIniting +yarn.NodeManagerMetrics.ContainersKilled +yarn.NodeManagerMetrics.ContainersLaunched +yarn.NodeManagerMetrics.ContainersRunning +yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc +yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc +yarn.QueueMetrics.Queue=root.AMResourceLimitMB +yarn.QueueMetrics.Queue=root.AMResourceLimitVCores +yarn.QueueMetrics.Queue=root.ActiveApplications +yarn.QueueMetrics.Queue=root.ActiveUsers +yarn.QueueMetrics.Queue=root.AggregateContainersAllocated +yarn.QueueMetrics.Queue=root.AggregateContainersReleased +yarn.QueueMetrics.Queue=root.AllocatedContainers +yarn.QueueMetrics.Queue=root.AllocatedMB +yarn.QueueMetrics.Queue=root.AllocatedVCores +yarn.QueueMetrics.Queue=root.AppAttemptFirstContainerAllocationDelayAvgTime +yarn.QueueMetrics.Queue=root.AppAttemptFirstContainerAllocationDelayNumOps +yarn.QueueMetrics.Queue=root.AppsCompleted +yarn.QueueMetrics.Queue=root.AppsFailed +yarn.QueueMetrics.Queue=root.AppsKilled +yarn.QueueMetrics.Queue=root.AppsPending +yarn.QueueMetrics.Queue=root.AppsRunning +yarn.QueueMetrics.Queue=root.AppsSubmitted +yarn.QueueMetrics.Queue=root.AvailableMB +yarn.QueueMetrics.Queue=root.AvailableVCores +yarn.QueueMetrics.Queue=root.PendingContainers +yarn.QueueMetrics.Queue=root.PendingMB +yarn.QueueMetrics.Queue=root.PendingVCores +yarn.QueueMetrics.Queue=root.ReservedContainers +yarn.QueueMetrics.Queue=root.ReservedMB +yarn.QueueMetrics.Queue=root.ReservedVCores +yarn.QueueMetrics.Queue=root.UsedAMResourceMB +yarn.QueueMetrics.Queue=root.UsedAMResourceVCores +yarn.QueueMetrics.Queue=root.default.AMResourceLimitMB +yarn.QueueMetrics.Queue=root.default.AMResourceLimitVCores +yarn.QueueMetrics.Queue=root.default.ActiveApplications +yarn.QueueMetrics.Queue=root.default.ActiveUsers +yarn.QueueMetrics.Queue=root.default.AggregateContainersAllocated +yarn.QueueMetrics.Queue=root.default.AggregateContainersReleased +yarn.QueueMetrics.Queue=root.default.AllocatedContainers +yarn.QueueMetrics.Queue=root.default.AllocatedMB +yarn.QueueMetrics.Queue=root.default.AllocatedVCores +yarn.QueueMetrics.Queue=root.default.AppAttemptFirstContainerAllocationDelayAvgTime +yarn.QueueMetrics.Queue=root.default.AppAttemptFirstContainerAllocationDelayNumOps +yarn.QueueMetrics.Queue=root.default.AppsCompleted +yarn.QueueMetrics.Queue=root.default.AppsFailed +yarn.QueueMetrics.Queue=root.default.AppsKilled +yarn.QueueMetrics.Queue=root.default.AppsPending +yarn.QueueMetrics.Queue=root.default.AppsRunning +yarn.QueueMetrics.Queue=root.default.AppsSubmitted +yarn.QueueMetrics.Queue=root.default.AvailableMB +yarn.QueueMetrics.Queue=root.default.AvailableVCores +yarn.QueueMetrics.Queue=root.default.PendingContainers +yarn.QueueMetrics.Queue=root.default.PendingMB +yarn.QueueMetrics.Queue=root.default.PendingVCores +yarn.QueueMetrics.Queue=root.default.ReservedContainers +yarn.QueueMetrics.Queue=root.default.ReservedMB +yarn.QueueMetrics.Queue=root.default.ReservedVCores +yarn.QueueMetrics.Queue=root.default.UsedAMResourceMB +yarn.QueueMetrics.Queue=root.default.UsedAMResourceVCores +yarn.QueueMetrics.Queue=root.default.running_0 +yarn.QueueMetrics.Queue=root.default.running_1440 +yarn.QueueMetrics.Queue=root.default.running_300 +yarn.QueueMetrics.Queue=root.default.running_60 +yarn.QueueMetrics.Queue=root.running_0 +yarn.QueueMetrics.Queue=root.running_1440 +yarn.QueueMetrics.Queue=root.running_300 +yarn.QueueMetrics.Queue=root.running_60 http://git-wip-us.apache.org/repos/asf/ambari/blob/8ef6b606/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py new file mode 100644 index 0000000..910bde3 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/AMBARI_METRICS/split_points.py @@ -0,0 +1,210 @@ +# !/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +import os +import sys +import re +import math +import collections +import ast + +metric_filename_ext = '.txt' +# 5 regions for higher order aggregate tables +other_region_static_count = 5 + +b_bytes = 1 +k_bytes = 1 << 10 # 1024 +m_bytes = 1 << 20 # 1024^2 +g_bytes = 1 << 30 # 1024^3 +t_bytes = 1 << 40 # 1024^4 +p_bytes = 1 << 50 # 1024^5 + +def to_number(s): + try: + return int(re.sub("\D", "", s)) + except ValueError: + return None + +def format_Xmx_size_to_bytes(value): + strvalue = str(value).lower() + if len(strvalue) == 0: + return 0 + modifier = strvalue[-1] + + if modifier == ' ' or modifier in "0123456789": + modifier = 'b' + + m = { + modifier == 'b': b_bytes, + modifier == 'k': k_bytes, + modifier == 'm': m_bytes, + modifier == 'g': g_bytes, + modifier == 't': t_bytes, + modifier == 'p': p_bytes + } [1] + return to_number(strvalue) * m + +# Class that takes AMS HBase configs as input and determines the Region +# pre-splits based on selected services also passed as a parameter to the class. +class FindSplitPointsForAMSRegions(): + + def __init__(self, ams_hbase_site, ams_hbase_env, serviceMetricsDir, + operation_mode = 'embedded', services = None): + self.ams_hbase_site = ams_hbase_site + self.ams_hbase_env = ams_hbase_env + self.serviceMetricsDir = serviceMetricsDir + self.services = services + self.mode = operation_mode + # Initialize before user + self.initialize() + + def initialize(self): + # calculate regions based on available memory + self.initialize_region_counts() + self.initialize_ordered_set_of_metrics() + + def initialize_region_counts(self): + try: + xmx_master_bytes = format_Xmx_size_to_bytes(self.ams_hbase_env['hbase_master_heapsize']) + xmx_region_bytes = format_Xmx_size_to_bytes(self.ams_hbase_env['hbase_regionserver_heapsize']) + xmx_bytes = xmx_master_bytes + xmx_region_bytes + if self.mode == 'distributed': + xmx_bytes = xmx_region_bytes + + memstore_max_mem = float(self.ams_hbase_site['hbase.regionserver.global.memstore.upperLimit']) * xmx_bytes + memstore_flush_size = format_Xmx_size_to_bytes(self.ams_hbase_site['hbase.hregion.memstore.flush.size']) + + max_inmemory_regions = (memstore_max_mem / memstore_flush_size) - other_region_static_count + print 'max_inmemory_regions: %s' % max_inmemory_regions + + if max_inmemory_regions > 2: + # Lets say total = 12, so we have 7 regions to allocate between + # METRIC_RECORD and METRIC_AGGREGATE tables, desired = (5, 2) + self.desired_precision_region_count = int(math.floor(0.8 * max_inmemory_regions)) + self.desired_aggregate_region_count = int(max_inmemory_regions - self.desired_precision_region_count) + else: + self.desired_precision_region_count = 1 + self.desired_aggregate_region_count = 1 + + except: + print('Bad config settings, could not calculate max regions available.') + pass + + def initialize_ordered_set_of_metrics(self): + onlyServicefiles = [ f for f in os.listdir(self.serviceMetricsDir) if + os.path.isfile(os.path.join(self.serviceMetricsDir, f)) ] + + metrics = set() + + for file in onlyServicefiles: + # Process for services selected at deploy time or all services if + # services arg is not passed + if self.services is None or file.rstrip(metric_filename_ext) in self.services: + print 'Processing file: %s' % os.path.join(self.serviceMetricsDir, file) + with open(os.path.join(self.serviceMetricsDir, file), 'r') as f: + for metric in f: + metrics.add(metric.strip()) + pass + pass + + self.metrics = sorted(metrics) + print 'metrics length: %s' % len(self.metrics) + + def get_split_points(self): + split_points = collections.namedtuple('SplitPoints', [ 'precision', 'aggregate' ]) + split_points.precision = [] + split_points.aggregate = [] + + metric_list = list(self.metrics) + metrics_total = len(metric_list) + + print 'desired_precision_region_count: %s' % self.desired_precision_region_count + print 'desired_aggregate_region_count: %s' % self.desired_aggregate_region_count + + if self.desired_precision_region_count > 1: + idx = int(math.ceil(metrics_total / self.desired_precision_region_count)) + index = idx + for i in range(0, self.desired_precision_region_count - 1): + if index < metrics_total - 1: + split_points.precision.append(metric_list[index]) + index += idx + + if self.desired_aggregate_region_count > 1: + idx = int(math.ceil(metrics_total / self.desired_aggregate_region_count)) + index = idx + for i in range(0, self.desired_aggregate_region_count - 1): + if index < metrics_total - 1: + split_points.aggregate.append(metric_list[index]) + index += idx + + return split_points + pass + +def main(argv = None): + scriptDir = os.path.realpath(os.path.dirname(argv[0])) + serviceMetricsDir = os.path.join(scriptDir, 'service-metrics') + if os.path.exists(serviceMetricsDir): + onlyargs = argv[1:] + if len(onlyargs) < 3: + sys.stderr.write("Usage: dict(ams-hbase-site) dict(ams-hbase-env) list(services)\n") + sys.exit(2) + pass + + ams_hbase_site = None + ams_hbase_env = None + services = None + try: + ams_hbase_site = ast.literal_eval(str(onlyargs[0])) + ams_hbase_env = ast.literal_eval(str(onlyargs[1])) + services = onlyargs[2] + if services: + services = str(services).split(',') + pass + except Exception, ex: + sys.stderr.write(str(ex)) + sys.stderr.write("\nUsage: Expected items not found in input. Found " + " ams-hbase-site => {0}, ams-hbase-env => {1}," + " services => {2}".format(ams_hbase_site, ams_hbase_env, services)) + sys.exit(2) + + print '--------- AMS Regions Split point finder ---------' + print 'Services: %s' % services + + mode = 'distributed' if 'hbase.rootdir' in ams_hbase_site and \ + 'hdfs' in ams_hbase_site['hbase.rootdir'] else \ + 'embedded' + + split_point_finder = FindSplitPointsForAMSRegions( + ams_hbase_site, ams_hbase_env, serviceMetricsDir, mode, services) + + result = split_point_finder.get_split_points() + print 'Split points for precision table : %s' % len(result.precision) + print 'precision: %s' % str(result.precision) + print 'Split points for aggregate table : %s' % len(result.aggregate) + print 'aggregate: %s' % str(result.aggregate) + + return 0 + + else: + print 'Cannot find service metrics dir in %s' % scriptDir + +if __name__ == '__main__': + main(sys.argv) +