Repository: ambari Updated Branches: refs/heads/branch-2.1 3a273df10 -> e1ac20931
AMBARI-11806. 500 errors for wigdet on HDP-2.3 stack installation. (jaimin) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/e1ac2093 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/e1ac2093 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/e1ac2093 Branch: refs/heads/branch-2.1 Commit: e1ac209319fa3c1fcbface46acc553ff46015b73 Parents: 3a273df Author: Jaimin Jetly <jai...@hortonworks.com> Authored: Mon Jun 8 21:14:30 2015 -0700 Committer: Jaimin Jetly <jai...@hortonworks.com> Committed: Mon Jun 8 21:14:30 2015 -0700 ---------------------------------------------------------------------- .../HDP/2.3/services/YARN/YARN_widgets.json | 676 +++++++++++++++++++ .../stacks/HDP/2.3/services/YARN/widgets.json | 676 ------------------- 2 files changed, 676 insertions(+), 676 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/e1ac2093/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/YARN_widgets.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/YARN_widgets.json b/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/YARN_widgets.json new file mode 100644 index 0000000..5179470 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/YARN_widgets.json @@ -0,0 +1,676 @@ +{ + "layouts": [ + { + "layout_name": "default_yarn_dashboard", + "display_name": "Standard YARN Dashboard", + "section_name": "YARN_SUMMARY", + "widgetLayoutInfo": [ + { + "widget_name": "Memory Utilization", + "description": "Percentage of total memory allocated to containers running in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AllocatedMB", + "metric_path": "metrics/yarn/Queue/root/AllocatedMB", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AvailableMB", + "metric_path": "metrics/yarn/Queue/root/AvailableMB", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "Memory Utilization", + "value": "${(yarn.QueueMetrics.Queue=root.AllocatedMB / (yarn.QueueMetrics.Queue=root.AllocatedMB + yarn.QueueMetrics.Queue=root.AvailableMB)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "CPU Utilization", + "description": "Percentage of total virtual cores allocated to containers running in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AllocatedVCores", + "metric_path": "metrics/yarn/Queue/root/AllocatedVCores", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AvailableVCores", + "metric_path": "metrics/yarn/Queue/root/AvailableVCores", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "Total Allocatable CPU Utilized across NodeManager", + "value": "${(yarn.QueueMetrics.Queue=root.AllocatedVCores / (yarn.QueueMetrics.Queue=root.AllocatedVCores + yarn.QueueMetrics.Queue=root.AvailableVCores)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Bad Local Disks", + "description": "Number of unhealthy local disks across all NodeManagers.", + "widget_type": "NUMBER", + "is_visible": true, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.BadLocalDirs", + "metric_path": "metrics/yarn/BadLocalDirs", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.BadLogDirs", + "metric_path": "metrics/yarn/BadLogDirs", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Number of unhealthy local disks for NodeManager", + "value": "${yarn.NodeManagerMetrics.BadLocalDirs + yarn.NodeManagerMetrics.BadLogDirs}" + } + ], + "properties": { + "display_unit": "" + } + }, + { + "widget_name": "Container Failures", + "description": "Percentage of all containers failing in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.ContainersFailed._sum", + "metric_path": "metrics/yarn/ContainersFailed._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersCompleted._sum", + "metric_path": "metrics/yarn/ContainersCompleted._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersLaunched._sum", + "metric_path": "metrics/yarn/ContainersLaunched._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersIniting._sum", + "metric_path": "metrics/yarn/ContainersIniting._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersKilled._sum", + "metric_path": "metrics/yarn/ContainersKilled._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersRunning._sum", + "metric_path": "metrics/yarn/ContainersRunning._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Container Failures", + "value": "${(yarn.NodeManagerMetrics.ContainersFailed._sum/(yarn.NodeManagerMetrics.ContainersFailed._sum + yarn.NodeManagerMetrics.ContainersCompleted._sum + yarn.NodeManagerMetrics.ContainersLaunched._sum + yarn.NodeManagerMetrics.ContainersIniting._sum + yarn.NodeManagerMetrics.ContainersKilled._sum + yarn.NodeManagerMetrics.ContainersRunning._sum)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "App Failures", + "description": "Percentage of all launched applications failing in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AppsFailed", + "metric_path": "metrics/yarn/Queue/root/AppsFailed", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsKilled", + "metric_path": "metrics/yarn/Queue/root/AppsKilled", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsPending", + "metric_path": "metrics/yarn/Queue/root/AppsPending", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsRunning", + "metric_path": "metrics/yarn/Queue/root/AppsRunning", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsSubmitted", + "metric_path": "metrics/yarn/Queue/root/AppsSubmitted", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsCompleted", + "metric_path": "metrics/yarn/Queue/root/AppsCompleted", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "App Failures", + "value": "${(yarn.QueueMetrics.Queue=root.AppsFailed/(yarn.QueueMetrics.Queue=root.AppsFailed + yarn.QueueMetrics.Queue=root.AppsKilled + yarn.QueueMetrics.Queue=root.AppsPending + yarn.QueueMetrics.Queue=root.AppsRunning + yarn.QueueMetrics.Queue=root.AppsSubmitted + yarn.QueueMetrics.Queue=root.AppsCompleted)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Pending Apps", + "description": "Count of applications waiting for cluster resources to become available.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AppsPending", + "metric_path": "metrics/yarn/Queue/root/AppsPending", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "Pending Apps", + "value": "${yarn.QueueMetrics.Queue=root.AppsPending}" + } + ], + "properties": { + "display_unit": "Apps", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster Memory", + "description": "Percentage of memory used across all NodeManager hosts.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "mem_total._sum", + "metric_path": "metrics/memory/mem_total._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "mem_free._sum", + "metric_path": "metrics/memory/mem_free._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "mem_cached._sum", + "metric_path": "metrics/memory/mem_cached._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Memory utilization", + "value": "${((mem_total._sum - mem_free._sum - mem_cached._sum)/mem_total._sum) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster Disk", + "description": "Sum of disk throughput for all NodeManager hosts.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "read_bps._sum", + "metric_path": "metrics/disk/read_bps._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "write_bps._sum", + "metric_path": "metrics/disk/write_bps._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Read throughput", + "value": "${read_bps._sum/1048576}" + }, + { + "name": "Write throughput", + "value": "${write_bps._sum/1048576}" + } + ], + "properties": { + "display_unit": "Mbps", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster Network", + "description": "Average of Network utilized across all NodeManager hosts.", + "default_section_name": "YARN_SUMMARY", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "pkts_in._avg", + "metric_path": "metrics/network/pkts_in._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "pkts_out._avg", + "metric_path": "metrics/network/pkts_out._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Packets In", + "value": "${pkts_in._avg}" + }, + { + "name": "Packets Out", + "value": "${pkts_out._avg}" + } + ], + "properties": { + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster CPU", + "description": "Percentage of CPU utilized across all NodeManager hosts.", + "default_section_name": "YARN_SUMMARY", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "cpu_system._sum", + "metric_path": "metrics/cpu/cpu_system._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_user._sum", + "metric_path": "metrics/cpu/cpu_user._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_nice._sum", + "metric_path": "metrics/cpu/cpu_nice._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_idle._sum", + "metric_path": "metrics/cpu/cpu_idle._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_wio._sum", + "metric_path": "metrics/cpu/cpu_wio._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "CPU utilization", + "value": "${((cpu_system._sum + cpu_user._sum + cpu_nice._sum)/(cpu_system._sum + cpu_user._sum + cpu_nice._sum + cpu_idle._sum + cpu_wio._sum)) * 100}" + } + ], + "properties": { + "graph_type": "LINE", + "time_range": "1", + "display_unit": "%" + } + } + ] + }, + { + "layout_name": "default_yarn_heatmap", + "display_name": "YARN Heatmaps", + "section_name": "YARN_HEATMAPS", + "widgetLayoutInfo": [ + { + "widget_name": "YARN local disk space utilization per NodeManager", + "description": "", + "widget_type": "HEATMAP", + "is_visible": true, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc", + "metric_path": "metrics/yarn/GoodLocalDirsDiskUtilizationPerc", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc", + "metric_path": "metrics/yarn/GoodLogDirsDiskUtilizationPerc", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "YARN local disk space utilization per NodeManager", + "value": "${(yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc + yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc)/2}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "Total Allocatable RAM Utilized per NodeManager", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedGB", + "metric_path": "metrics/yarn/AllocatedGB", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.AvailableGB", + "metric_path": "metrics/yarn/AvailableGB", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Total Allocatable RAM Utilized per NodeManager", + "value": "${(yarn.NodeManagerMetrics.AllocatedGB/(yarn.NodeManagerMetrics.AvailableGB + yarn.NodeManagerMetrics.AllocatedGB)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "Total Allocatable CPU Utilized per NodeManager", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedVCores", + "metric_path": "metrics/yarn/AllocatedVCores", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.AvailableVCores", + "metric_path": "metrics/yarn/AvailableVCores", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Total Allocatable CPU Utilized per NodeManager", + "value": "${(yarn.NodeManagerMetrics.AllocatedVCores/(yarn.NodeManagerMetrics.AllocatedVCores + yarn.NodeManagerMetrics.AvailableVCores)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "Container Failures", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.ContainersFailed", + "metric_path": "metrics/yarn/ContainersFailed", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersCompleted", + "metric_path": "metrics/yarn/ContainersCompleted", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersLaunched", + "metric_path": "metrics/yarn/ContainersLaunched", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersIniting", + "metric_path": "metrics/yarn/ContainersIniting", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersKilled", + "metric_path": "metrics/yarn/ContainersKilled", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersRunning", + "metric_path": "metrics/yarn/ContainersRunning", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Container Failures", + "value": "${(yarn.NodeManagerMetrics.ContainersFailed/(yarn.NodeManagerMetrics.ContainersFailed + yarn.NodeManagerMetrics.ContainersCompleted + yarn.NodeManagerMetrics.ContainersLaunched + yarn.NodeManagerMetrics.ContainersIniting + yarn.NodeManagerMetrics.ContainersKilled + yarn.NodeManagerMetrics.ContainersRunning)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "NodeManager GC Time", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "Hadoop:service=NodeManager,name=JvmMetrics.GcTimeMillis", + "metric_path": "metrics/jvm/gcTimeMillis", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager Garbage Collection Time", + "value": "${Hadoop:service=NodeManager,name=JvmMetrics.GcTimeMillis}" + } + ], + "properties": { + "display_unit": "ms", + "max_limit": "10000" + } + }, + { + "widget_name": "NodeManager JVM Heap Memory Used", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "Hadoop:service=NodeManager,name=JvmMetrics.MemHeapUsedM", + "metric_path": "metrics/jvm/memHeapUsedM", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager JVM Heap Memory Used", + "value": "${Hadoop:service=NodeManager,name=JvmMetrics.MemHeapUsedM}" + } + ], + "properties": { + "display_unit": "MB", + "max_limit": "512" + } + }, + { + "widget_name": "Allocated Containers", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedContainers", + "metric_path": "metrics/yarn/AllocatedContainers", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Allocated Containers", + "value": "${yarn.NodeManagerMetrics.AllocatedContainers}" + } + ], + "properties": { + "display_unit": "", + "max_limit": "100" + } + }, + { + "widget_name": "NodeManager RAM Utilized", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedGB", + "metric_path": "metrics/yarn/AllocatedGB", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager RAM Utilized", + "value": "${yarn.NodeManagerMetrics.AllocatedGB}" + } + ], + "properties": { + "display_unit": "", + "max_limit": "100" + } + }, + { + "widget_name": "NodeManager CPU Utilized", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedVCores", + "metric_path": "metrics/yarn/AllocatedVCores", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager CPU Utilized", + "value": "${yarn.NodeManagerMetrics.AllocatedVCores}" + } + ], + "properties": { + "display_unit": "", + "max_limit": "100" + } + } + ] + } + ] +} http://git-wip-us.apache.org/repos/asf/ambari/blob/e1ac2093/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/widgets.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/widgets.json b/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/widgets.json deleted file mode 100644 index 5179470..0000000 --- a/ambari-server/src/main/resources/stacks/HDP/2.3/services/YARN/widgets.json +++ /dev/null @@ -1,676 +0,0 @@ -{ - "layouts": [ - { - "layout_name": "default_yarn_dashboard", - "display_name": "Standard YARN Dashboard", - "section_name": "YARN_SUMMARY", - "widgetLayoutInfo": [ - { - "widget_name": "Memory Utilization", - "description": "Percentage of total memory allocated to containers running in the cluster.", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "yarn.QueueMetrics.Queue=root.AllocatedMB", - "metric_path": "metrics/yarn/Queue/root/AllocatedMB", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - }, - { - "name": "yarn.QueueMetrics.Queue=root.AvailableMB", - "metric_path": "metrics/yarn/Queue/root/AvailableMB", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - } - ], - "values": [ - { - "name": "Memory Utilization", - "value": "${(yarn.QueueMetrics.Queue=root.AllocatedMB / (yarn.QueueMetrics.Queue=root.AllocatedMB + yarn.QueueMetrics.Queue=root.AvailableMB)) * 100}" - } - ], - "properties": { - "display_unit": "%", - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "CPU Utilization", - "description": "Percentage of total virtual cores allocated to containers running in the cluster.", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "yarn.QueueMetrics.Queue=root.AllocatedVCores", - "metric_path": "metrics/yarn/Queue/root/AllocatedVCores", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - }, - { - "name": "yarn.QueueMetrics.Queue=root.AvailableVCores", - "metric_path": "metrics/yarn/Queue/root/AvailableVCores", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - } - ], - "values": [ - { - "name": "Total Allocatable CPU Utilized across NodeManager", - "value": "${(yarn.QueueMetrics.Queue=root.AllocatedVCores / (yarn.QueueMetrics.Queue=root.AllocatedVCores + yarn.QueueMetrics.Queue=root.AvailableVCores)) * 100}" - } - ], - "properties": { - "display_unit": "%", - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "Bad Local Disks", - "description": "Number of unhealthy local disks across all NodeManagers.", - "widget_type": "NUMBER", - "is_visible": true, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.BadLocalDirs", - "metric_path": "metrics/yarn/BadLocalDirs", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.BadLogDirs", - "metric_path": "metrics/yarn/BadLogDirs", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Number of unhealthy local disks for NodeManager", - "value": "${yarn.NodeManagerMetrics.BadLocalDirs + yarn.NodeManagerMetrics.BadLogDirs}" - } - ], - "properties": { - "display_unit": "" - } - }, - { - "widget_name": "Container Failures", - "description": "Percentage of all containers failing in the cluster.", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.ContainersFailed._sum", - "metric_path": "metrics/yarn/ContainersFailed._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersCompleted._sum", - "metric_path": "metrics/yarn/ContainersCompleted._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersLaunched._sum", - "metric_path": "metrics/yarn/ContainersLaunched._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersIniting._sum", - "metric_path": "metrics/yarn/ContainersIniting._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersKilled._sum", - "metric_path": "metrics/yarn/ContainersKilled._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersRunning._sum", - "metric_path": "metrics/yarn/ContainersRunning._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Container Failures", - "value": "${(yarn.NodeManagerMetrics.ContainersFailed._sum/(yarn.NodeManagerMetrics.ContainersFailed._sum + yarn.NodeManagerMetrics.ContainersCompleted._sum + yarn.NodeManagerMetrics.ContainersLaunched._sum + yarn.NodeManagerMetrics.ContainersIniting._sum + yarn.NodeManagerMetrics.ContainersKilled._sum + yarn.NodeManagerMetrics.ContainersRunning._sum)) * 100}" - } - ], - "properties": { - "display_unit": "%", - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "App Failures", - "description": "Percentage of all launched applications failing in the cluster.", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "yarn.QueueMetrics.Queue=root.AppsFailed", - "metric_path": "metrics/yarn/Queue/root/AppsFailed", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - }, - { - "name": "yarn.QueueMetrics.Queue=root.AppsKilled", - "metric_path": "metrics/yarn/Queue/root/AppsKilled", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - }, - { - "name": "yarn.QueueMetrics.Queue=root.AppsPending", - "metric_path": "metrics/yarn/Queue/root/AppsPending", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - }, - { - "name": "yarn.QueueMetrics.Queue=root.AppsRunning", - "metric_path": "metrics/yarn/Queue/root/AppsRunning", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - }, - { - "name": "yarn.QueueMetrics.Queue=root.AppsSubmitted", - "metric_path": "metrics/yarn/Queue/root/AppsSubmitted", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - }, - { - "name": "yarn.QueueMetrics.Queue=root.AppsCompleted", - "metric_path": "metrics/yarn/Queue/root/AppsCompleted", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - } - ], - "values": [ - { - "name": "App Failures", - "value": "${(yarn.QueueMetrics.Queue=root.AppsFailed/(yarn.QueueMetrics.Queue=root.AppsFailed + yarn.QueueMetrics.Queue=root.AppsKilled + yarn.QueueMetrics.Queue=root.AppsPending + yarn.QueueMetrics.Queue=root.AppsRunning + yarn.QueueMetrics.Queue=root.AppsSubmitted + yarn.QueueMetrics.Queue=root.AppsCompleted)) * 100}" - } - ], - "properties": { - "display_unit": "%", - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "Pending Apps", - "description": "Count of applications waiting for cluster resources to become available.", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "yarn.QueueMetrics.Queue=root.AppsPending", - "metric_path": "metrics/yarn/Queue/root/AppsPending", - "service_name": "YARN", - "component_name": "RESOURCEMANAGER", - "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" - } - ], - "values": [ - { - "name": "Pending Apps", - "value": "${yarn.QueueMetrics.Queue=root.AppsPending}" - } - ], - "properties": { - "display_unit": "Apps", - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "Cluster Memory", - "description": "Percentage of memory used across all NodeManager hosts.", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "mem_total._sum", - "metric_path": "metrics/memory/mem_total._avg", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "mem_free._sum", - "metric_path": "metrics/memory/mem_free._avg", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "mem_cached._sum", - "metric_path": "metrics/memory/mem_cached._avg", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Memory utilization", - "value": "${((mem_total._sum - mem_free._sum - mem_cached._sum)/mem_total._sum) * 100}" - } - ], - "properties": { - "display_unit": "%", - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "Cluster Disk", - "description": "Sum of disk throughput for all NodeManager hosts.", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "read_bps._sum", - "metric_path": "metrics/disk/read_bps._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "write_bps._sum", - "metric_path": "metrics/disk/write_bps._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Read throughput", - "value": "${read_bps._sum/1048576}" - }, - { - "name": "Write throughput", - "value": "${write_bps._sum/1048576}" - } - ], - "properties": { - "display_unit": "Mbps", - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "Cluster Network", - "description": "Average of Network utilized across all NodeManager hosts.", - "default_section_name": "YARN_SUMMARY", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "pkts_in._avg", - "metric_path": "metrics/network/pkts_in._avg", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "pkts_out._avg", - "metric_path": "metrics/network/pkts_out._avg", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Packets In", - "value": "${pkts_in._avg}" - }, - { - "name": "Packets Out", - "value": "${pkts_out._avg}" - } - ], - "properties": { - "graph_type": "LINE", - "time_range": "1" - } - }, - { - "widget_name": "Cluster CPU", - "description": "Percentage of CPU utilized across all NodeManager hosts.", - "default_section_name": "YARN_SUMMARY", - "widget_type": "GRAPH", - "is_visible": true, - "metrics": [ - { - "name": "cpu_system._sum", - "metric_path": "metrics/cpu/cpu_system._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "cpu_user._sum", - "metric_path": "metrics/cpu/cpu_user._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "cpu_nice._sum", - "metric_path": "metrics/cpu/cpu_nice._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "cpu_idle._sum", - "metric_path": "metrics/cpu/cpu_idle._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "cpu_wio._sum", - "metric_path": "metrics/cpu/cpu_wio._sum", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "CPU utilization", - "value": "${((cpu_system._sum + cpu_user._sum + cpu_nice._sum)/(cpu_system._sum + cpu_user._sum + cpu_nice._sum + cpu_idle._sum + cpu_wio._sum)) * 100}" - } - ], - "properties": { - "graph_type": "LINE", - "time_range": "1", - "display_unit": "%" - } - } - ] - }, - { - "layout_name": "default_yarn_heatmap", - "display_name": "YARN Heatmaps", - "section_name": "YARN_HEATMAPS", - "widgetLayoutInfo": [ - { - "widget_name": "YARN local disk space utilization per NodeManager", - "description": "", - "widget_type": "HEATMAP", - "is_visible": true, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc", - "metric_path": "metrics/yarn/GoodLocalDirsDiskUtilizationPerc", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc", - "metric_path": "metrics/yarn/GoodLogDirsDiskUtilizationPerc", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "YARN local disk space utilization per NodeManager", - "value": "${(yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc + yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc)/2}" - } - ], - "properties": { - "display_unit": "%", - "max_limit": "100" - } - }, - { - "widget_name": "Total Allocatable RAM Utilized per NodeManager", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.AllocatedGB", - "metric_path": "metrics/yarn/AllocatedGB", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.AvailableGB", - "metric_path": "metrics/yarn/AvailableGB", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Total Allocatable RAM Utilized per NodeManager", - "value": "${(yarn.NodeManagerMetrics.AllocatedGB/(yarn.NodeManagerMetrics.AvailableGB + yarn.NodeManagerMetrics.AllocatedGB)) * 100}" - } - ], - "properties": { - "display_unit": "%", - "max_limit": "100" - } - }, - { - "widget_name": "Total Allocatable CPU Utilized per NodeManager", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.AllocatedVCores", - "metric_path": "metrics/yarn/AllocatedVCores", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.AvailableVCores", - "metric_path": "metrics/yarn/AvailableVCores", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Total Allocatable CPU Utilized per NodeManager", - "value": "${(yarn.NodeManagerMetrics.AllocatedVCores/(yarn.NodeManagerMetrics.AllocatedVCores + yarn.NodeManagerMetrics.AvailableVCores)) * 100}" - } - ], - "properties": { - "display_unit": "%", - "max_limit": "100" - } - }, - { - "widget_name": "Container Failures", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.ContainersFailed", - "metric_path": "metrics/yarn/ContainersFailed", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersCompleted", - "metric_path": "metrics/yarn/ContainersCompleted", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersLaunched", - "metric_path": "metrics/yarn/ContainersLaunched", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersIniting", - "metric_path": "metrics/yarn/ContainersIniting", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersKilled", - "metric_path": "metrics/yarn/ContainersKilled", - "service_name": "YARN", - "component_name": "NODEMANAGER" - }, - { - "name": "yarn.NodeManagerMetrics.ContainersRunning", - "metric_path": "metrics/yarn/ContainersRunning", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Container Failures", - "value": "${(yarn.NodeManagerMetrics.ContainersFailed/(yarn.NodeManagerMetrics.ContainersFailed + yarn.NodeManagerMetrics.ContainersCompleted + yarn.NodeManagerMetrics.ContainersLaunched + yarn.NodeManagerMetrics.ContainersIniting + yarn.NodeManagerMetrics.ContainersKilled + yarn.NodeManagerMetrics.ContainersRunning)) * 100}" - } - ], - "properties": { - "display_unit": "%", - "max_limit": "100" - } - }, - { - "widget_name": "NodeManager GC Time", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "Hadoop:service=NodeManager,name=JvmMetrics.GcTimeMillis", - "metric_path": "metrics/jvm/gcTimeMillis", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "NodeManager Garbage Collection Time", - "value": "${Hadoop:service=NodeManager,name=JvmMetrics.GcTimeMillis}" - } - ], - "properties": { - "display_unit": "ms", - "max_limit": "10000" - } - }, - { - "widget_name": "NodeManager JVM Heap Memory Used", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "Hadoop:service=NodeManager,name=JvmMetrics.MemHeapUsedM", - "metric_path": "metrics/jvm/memHeapUsedM", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "NodeManager JVM Heap Memory Used", - "value": "${Hadoop:service=NodeManager,name=JvmMetrics.MemHeapUsedM}" - } - ], - "properties": { - "display_unit": "MB", - "max_limit": "512" - } - }, - { - "widget_name": "Allocated Containers", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.AllocatedContainers", - "metric_path": "metrics/yarn/AllocatedContainers", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "Allocated Containers", - "value": "${yarn.NodeManagerMetrics.AllocatedContainers}" - } - ], - "properties": { - "display_unit": "", - "max_limit": "100" - } - }, - { - "widget_name": "NodeManager RAM Utilized", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.AllocatedGB", - "metric_path": "metrics/yarn/AllocatedGB", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "NodeManager RAM Utilized", - "value": "${yarn.NodeManagerMetrics.AllocatedGB}" - } - ], - "properties": { - "display_unit": "", - "max_limit": "100" - } - }, - { - "widget_name": "NodeManager CPU Utilized", - "description": "", - "widget_type": "HEATMAP", - "is_visible": false, - "metrics": [ - { - "name": "yarn.NodeManagerMetrics.AllocatedVCores", - "metric_path": "metrics/yarn/AllocatedVCores", - "service_name": "YARN", - "component_name": "NODEMANAGER" - } - ], - "values": [ - { - "name": "NodeManager CPU Utilized", - "value": "${yarn.NodeManagerMetrics.AllocatedVCores}" - } - ], - "properties": { - "display_unit": "", - "max_limit": "100" - } - } - ] - } - ] -}