http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/configuration/yarn-site.xml ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/configuration/yarn-site.xml b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/configuration/yarn-site.xml new file mode 100755 index 0000000..59ff82b --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/configuration/yarn-site.xml @@ -0,0 +1,579 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!-- Put site-specific property overrides in this file. --> +<configuration xmlns:xi="http://www.w3.org/2001/XInclude" supports_final="true"> + <property> + <name>yarn.resourcemanager.hostname</name> + <value>localhost</value> + <description>The hostname of the RM.</description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.resourcemanager.resource-tracker.address</name> + <value>localhost:8025</value> + <description> The address of ResourceManager. </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.resourcemanager.scheduler.address</name> + <value>localhost:8030</value> + <description>The address of the scheduler interface.</description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.resourcemanager.address</name> + <value>localhost:8050</value> + <description> + The address of the applications manager interface in the + RM. + </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.resourcemanager.admin.address</name> + <value>localhost:8141</value> + <description>The address of the RM admin interface.</description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.resourcemanager.scheduler.class</name> + <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> + <description>The class to use as the resource scheduler.</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.scheduler.minimum-allocation-mb</name> + <value>512</value> + <description> + The minimum allocation for every container request at the RM, + in MBs. Memory requests lower than this won't take effect, + and the specified value will get allocated at minimum. + </description> + <display-name>Minimum Container Size (Memory)</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>5120</maximum> + <unit>MB</unit> + <increment-step>256</increment-step> + </value-attributes> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.nodemanager.resource.memory-mb</name> + </property> + </depends-on> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.scheduler.maximum-allocation-mb</name> + <value>5120</value> + <description> + The maximum allocation for every container request at the RM, + in MBs. Memory requests higher than this won't take effect, + and will get capped to this value. + </description> + <display-name>Maximum Container Size (Memory)</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>5120</maximum> + <unit>MB</unit> + <increment-step>256</increment-step> + </value-attributes> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.nodemanager.resource.memory-mb</name> + </property> + </depends-on> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.address</name> + <value>0.0.0.0:45454</value> + <description>The address of the container manager in the NM.</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.resource.memory-mb</name> + <value>5120</value> + <description>Amount of physical memory, in MB, that can be allocated + for containers.</description> + <display-name>Memory allocated for all YARN containers on a node</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>268435456</maximum> + <unit>MB</unit> + <increment-step>256</increment-step> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.application.classpath</name> + <value>/etc/hadoop/conf,/usr/lib/hadoop/*,/usr/lib/hadoop/lib/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-hdfs/lib/*,/usr/lib/hadoop-yarn/*,/usr/lib/hadoop-yarn/lib/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-mapreduce/lib/*</value> + <description>Classpath for typical applications.</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.vmem-pmem-ratio</name> + <value>2.1</value> + <description>Ratio between virtual memory to physical memory when + setting memory limits for containers. Container allocations are + expressed in terms of physical memory, and virtual memory usage + is allowed to exceed this allocation by this ratio. + </description> + <display-name>Virtual Memory Ratio</display-name> + <value-attributes> + <type>float</type> + <minimum>0.1</minimum> + <maximum>5.0</maximum> + <increment-step>0.1</increment-step> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.container-executor.class</name> + <value>org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor</value> + <description>ContainerExecutor for launching containers</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.linux-container-executor.group</name> + <value>hadoop</value> + <description>Unix group of the NodeManager</description> + <depends-on> + <property> + <type>cluster-env</type> + <name>user_group</name> + </property> + </depends-on> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.aux-services</name> + <value>mapreduce_shuffle</value> + <description>Auxilliary services of NodeManager. A valid service name should only contain a-zA-Z0-9_ and can + not start with numbers</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> + <value>org.apache.hadoop.mapred.ShuffleHandler</value> + <description>The auxiliary service class to use </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.log-dirs</name> + <value>/hadoop/yarn/log</value> + <description> + Where to store container logs. An application's localized log directory + will be found in ${yarn.nodemanager.log-dirs}/application_${appid}. + Individual containers' log directories will be below this, in directories + named container_{$contid}. Each container directory will contain the files + stderr, stdin, and syslog generated by that container. + </description> + <value-attributes> + <type>directories</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.local-dirs</name> + <value>/hadoop/yarn/local</value> + <description> + List of directories to store localized files in. An + application's localized file directory will be found in: + ${yarn.nodemanager.local-dirs}/usercache/${user}/appcache/application_${appid}. + Individual containers' work directories, called container_${contid}, will + be subdirectories of this. + </description> + <value-attributes> + <type>directories</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.container-monitor.interval-ms</name> + <value>3000</value> + <description> + The interval, in milliseconds, for which the node manager + waits between two cycles of monitoring its containers' memory usage. + </description> + <on-ambari-upgrade add="true"/> + </property> + <!-- + <property> + <name>yarn.nodemanager.health-checker.script.path</name> + <value>/etc/hadoop/conf/health_check_nodemanager</value> + <description>The health check script to run.</description> + </property> + --> + <property> + <name>yarn.nodemanager.health-checker.interval-ms</name> + <value>135000</value> + <description>Frequency of running node health script.</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.health-checker.script.timeout-ms</name> + <value>60000</value> + <description>Script time out period.</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.log.retain-second</name> + <value>604800</value> + <description> + Time in seconds to retain user logs. Only applicable if + log aggregation is disabled. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.log-aggregation-enable</name> + <value>true</value> + <description>Whether to enable log aggregation. </description> + <display-name>Enable Log Aggregation</display-name> + <value-attributes> + <type>boolean</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.remote-app-log-dir</name> + <value>/app-logs</value> + <description>Location to aggregate logs to. </description> + <property-type>NOT_MANAGED_HDFS_PATH</property-type> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.remote-app-log-dir-suffix</name> + <value>logs</value> + <description> + The remote log dir will be created at + {yarn.nodemanager.remote-app-log-dir}/${user}/{thisParam}. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.log-aggregation.compression-type</name> + <value>gz</value> + <description> + T-file compression types used to compress aggregated logs. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.delete.debug-delay-sec</name> + <value>0</value> + <description> + Number of seconds after an application finishes before the nodemanager's + DeletionService will delete the application's localized file directory + and log directory. + + To diagnose Yarn application problems, set this property's value large + enough (for example, to 600 = 10 minutes) to permit examination of these + directories. After changing the property's value, you must restart the + nodemanager in order for it to have an effect. + + The roots of Yarn applications' work directories is configurable with + the yarn.nodemanager.local-dirs property (see below), and the roots + of the Yarn applications' log directories is configurable with the + yarn.nodemanager.log-dirs property (see also below). + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.log-aggregation.retain-seconds</name> + <value>2592000</value> + <description> + How long to keep aggregation logs before deleting them. -1 disables. + Be careful set this too small and you will spam the name node. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.admin-env</name> + <value>MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX</value> + <description> + Environment variables that should be forwarded from the NodeManager's + environment to the container's. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name> + <value>0.25</value> + <description> + The minimum fraction of number of disks to be healthy for the nodemanager + to launch new containers. This correspond to both + yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs. i.e. + If there are less number of healthy local-dirs (or log-dirs) available, + then new containers will not be launched on this node. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.resourcemanager.am.max-attempts</name> + <value>2</value> + <description> + The maximum number of application attempts. It's a global + setting for all application masters. Each application master can specify + its individual maximum number of application attempts via the API, but the + individual number cannot be more than the global upper bound. If it is, + the resourcemanager will override it. The default number is set to 2, to + allow at least one retry for AM. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.resourcemanager.webapp.address</name> + <value>localhost:8088</value> + <description> + The address of the RM web application. + </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.resourcemanager.webapp.https.address</name> + <value>localhost:8090</value> + <description> + The https address of the RM web application. + </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.nodemanager.vmem-check-enabled</name> + <value>false</value> + <description> + Whether virtual memory limits will be enforced for containers. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.log.server.url</name> + <value>http://localhost:19888/jobhistory/logs</value> + <description> + URI for the HistoryServer's log resource + </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.resourcemanager.nodes.exclude-path</name> + <value>/etc/hadoop/conf/yarn.exclude</value> + <description> + Names a file that contains a list of hosts that are + not permitted to connect to the resource manager. The full pathname of the + file must be specified. If the value is empty, no hosts are + excluded. + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.http.policy</name> + <value>HTTP_ONLY</value> + <description> + This configures the HTTP endpoint for Yarn Daemons.The following values are supported: - HTTP_ONLY : Service is provided only on http - HTTPS_ONLY : Service is provided only on https + </description> + <on-ambari-upgrade add="true"/> + </property> +==================== + <property> + <name>yarn.timeline-service.enabled</name> + <value>true</value> + <description>Indicate to clients whether timeline service is enabled or not. + If enabled, clients will put entities and events to the timeline server. + </description> + <value-attributes> + <type>boolean</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.generic-application-history.store-class</name> + <value>org.apache.hadoop.yarn.server.applicationhistoryservice.NullApplicationHistoryStore</value> + <description> + Store class name for history store, defaulting to file system store + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.leveldb-timeline-store.path</name> + <value>/var/log/hadoop-yarn/timeline</value> + <description> + Store file name for leveldb timeline store + </description> + <value-attributes> + <type>directory</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.webapp.address</name> + <value>localhost:8188</value> + <description> + The http address of the timeline service web application. + </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.timeline-service.webapp.https.address</name> + <value>localhost:8190</value> + <description> + The http address of the timeline service web application. + </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <name>yarn.timeline-service.address</name> + <value>localhost:10200</value> + <description> + This is default address for the timeline server to start + the RPC server. + </description> + <on-ambari-upgrade add="false"/> + </property> + <property> + <description>Enable age off of timeline store data.</description> + <name>yarn.timeline-service.ttl-enable</name> + <value>true</value> + <value-attributes> + <type>boolean</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <description>Time to live for timeline store data in milliseconds.</description> + <name>yarn.timeline-service.ttl-ms</name> + <value>2678400000</value> + <value-attributes> + <type>int</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <description>Length of time to wait between deletion cycles of leveldb timeline store in milliseconds.</description> + <name>yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms</name> + <value>300000</value> + <value-attributes> + <type>int</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> +============================= + <property> + <name>yarn.timeline-service.recovery.enabled</name> + <description> + Enable timeline server to recover state after starting. If + true, then yarn.timeline-service.state-store-class must be specified. + </description> + <value>true</value> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.acl.enable</name> + <value>false</value> + <description> Are acls enabled. </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.authorization-provider</name> + <description> Yarn authorization provider class. </description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.admin.acl</name> + <value>yarn</value> + <description> ACL of who can be admin of the YARN cluster. </description> + <value-attributes> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <!--ats v1.5 properties--> + <property> + <name>yarn.timeline-service.store-class</name> + <value>org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore</value> + <description>Main storage class for YARN timeline server.</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.entity-group-fs-store.active-dir</name> + <value>/ats/active/</value> + <description>DFS path to store active application’s timeline data</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.entity-group-fs-store.done-dir</name> + <value>/ats/done/</value> + <description>DFS path to store done application’s timeline data</description> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.entity-group-fs-store.group-id-plugin-classes</name> + <value/> + <description>Plugins that can translate a timeline entity read request into a list of timeline cache ids, separated by commas. </description> + <value-attributes> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <!-- advanced ats v1.5 properties--> + <property> + <name>yarn.timeline-service.entity-group-fs-store.summary-store</name> + <description>Summary storage for ATS v1.5</description> + <!-- Use rolling leveldb, advanced --> + <value>org.apache.hadoop.yarn.server.timeline.RollingLevelDBTimelineStore</value> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.entity-group-fs-store.scan-interval-seconds</name> + <description> + Scan interval for ATS v1.5 entity group file system storage reader.This + value controls how frequent the reader will scan the HDFS active directory + for application status. + </description> + <!-- Default is 60 seconds, advanced --> + <value>60</value> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds</name> + <description> + Scan interval for ATS v1.5 entity group file system storage cleaner.This + value controls how frequent the reader will scan the HDFS done directory + for stale application data. + </description> + <!-- 3600 is default, advanced --> + <value>3600</value> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>yarn.timeline-service.entity-group-fs-store.retain-seconds</name> + <description> + How long the ATS v1.5 entity group file system storage will keep an + application's data in the done directory. + </description> + <!-- 7 days is default, advanced --> + <value>604800</value> + <on-ambari-upgrade add="true"/> + </property> +</configuration>
http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/kerberos.json ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/kerberos.json b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/kerberos.json new file mode 100755 index 0000000..4093431 --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/kerberos.json @@ -0,0 +1,214 @@ +{ + "services": [ + { + "name": "YARN", + "identities": [ + { + "name": "/spnego" + }, + { + "name": "/smokeuser" + } + ], + "configurations": [ + { + "yarn-site": { + "yarn.timeline-service.enabled": "false", + "yarn.timeline-service.http-authentication.type": "kerberos", + "yarn.acl.enable": "true", + "yarn.timeline-service.http-authentication.signature.secret": "", + "yarn.timeline-service.http-authentication.signature.secret.file": "", + "yarn.timeline-service.http-authentication.signer.secret.provider": "", + "yarn.timeline-service.http-authentication.signer.secret.provider.object": "", + "yarn.timeline-service.http-authentication.token.validity": "", + "yarn.timeline-service.http-authentication.cookie.domain": "", + "yarn.timeline-service.http-authentication.cookie.path": "", + "yarn.timeline-service.http-authentication.proxyusers.*.hosts": "", + "yarn.timeline-service.http-authentication.proxyusers.*.users": "", + "yarn.timeline-service.http-authentication.proxyusers.*.groups": "", + "yarn.timeline-service.http-authentication.kerberos.name.rules": "", + "yarn.resourcemanager.proxyusers.*.groups": "", + "yarn.resourcemanager.proxyusers.*.hosts": "", + "yarn.resourcemanager.proxyusers.*.users": "", + "yarn.resourcemanager.proxy-user-privileges.enabled": "true", + "yarn.nodemanager.linux-container-executor.cgroups.mount-path": "" + } + }, + { + "core-site": { + "hadoop.proxyuser.${yarn-env/yarn_user}.groups": "*", + "hadoop.proxyuser.${yarn-env/yarn_user}.hosts": "${clusterHostInfo/rm_host}" + } + } + ], + "components": [ + { + "name": "NODEMANAGER", + "identities": [ + { + "name": "nodemanager_nm", + "principal": { + "value": "nm/_HOST@${realm}", + "type" : "service", + "configuration": "yarn-site/yarn.nodemanager.principal", + "local_username": "${yarn-env/yarn_user}" + }, + "keytab": { + "file": "${keytab_dir}/nm.service.keytab", + "owner": { + "name": "${yarn-env/yarn_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "yarn-site/yarn.nodemanager.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "yarn-site/yarn.nodemanager.webapp.spnego-principal" + }, + "keytab": { + "configuration": "yarn-site/yarn.nodemanager.webapp.spnego-keytab-file" + } + } + ], + "configurations": [ + { + "yarn-site": { + "yarn.nodemanager.container-executor.class": "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor" + } + } + ] + }, + { + "name": "RESOURCEMANAGER", + "identities": [ + { + "name": "resource_manager_rm", + "principal": { + "value": "rm/_HOST@${realm}", + "type" : "service", + "configuration": "yarn-site/yarn.resourcemanager.principal", + "local_username": "${yarn-env/yarn_user}" + }, + "keytab": { + "file": "${keytab_dir}/rm.service.keytab", + "owner": { + "name": "${yarn-env/yarn_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "yarn-site/yarn.resourcemanager.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "yarn-site/yarn.resourcemanager.webapp.spnego-principal" + }, + "keytab": { + "configuration": "yarn-site/yarn.resourcemanager.webapp.spnego-keytab-file" + } + } + ] + }, + { + "name": "APP_TIMELINE_SERVER", + "identities": [ + { + "name": "app_timeline_server_yarn", + "principal": { + "value": "yarn/_HOST@${realm}", + "type" : "service", + "configuration": "yarn-site/yarn.timeline-service.principal", + "local_username": "${yarn-env/yarn_user}" + }, + "keytab": { + "file": "${keytab_dir}/yarn.service.keytab", + "owner": { + "name": "${yarn-env/yarn_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "yarn-site/yarn.timeline-service.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "yarn-site/yarn.timeline-service.http-authentication.kerberos.principal" + }, + "keytab": { + "configuration": "yarn-site/yarn.timeline-service.http-authentication.kerberos.keytab" + } + }, + { + "name": "/HDFS/NAMENODE/hdfs" + } + ] + } + ] + }, + { + "name": "MAPREDUCE2", + "identities": [ + { + "name": "/spnego" + }, + { + "name": "/smokeuser" + } + ], + "components": [ + { + "name": "HISTORYSERVER", + "identities": [ + { + "name": "/HDFS/NAMENODE/hdfs" + }, + { + "name": "history_server_jhs", + "principal": { + "value": "jhs/_HOST@${realm}", + "type" : "service", + "configuration": "mapred-site/mapreduce.jobhistory.principal", + "local_username": "${mapred-env/mapred_user}" + }, + "keytab": { + "file": "${keytab_dir}/jhs.service.keytab", + "owner": { + "name": "${mapred-env/mapred_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "mapred-site/mapreduce.jobhistory.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "mapred-site/mapreduce.jobhistory.webapp.spnego-principal" + }, + "keytab": { + "configuration": "mapred-site/mapreduce.jobhistory.webapp.spnego-keytab-file" + } + } + ] + } + ] + } + ] +} http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/metainfo.xml ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/metainfo.xml b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/metainfo.xml new file mode 100755 index 0000000..b374b80 --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/metainfo.xml @@ -0,0 +1,310 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<metainfo> + <schemaVersion>2.0</schemaVersion> + <services> + <service> + <name>YARN</name> + <displayName>YARN</displayName> + <comment>Apache Hadoop NextGen MapReduce (YARN)</comment> + <version>2.7.1+odpi</version> + <components> + + <component> + <name>APP_TIMELINE_SERVER</name> + <displayName>App Timeline Server</displayName> + <category>MASTER</category> + <cardinality>0-1</cardinality> + <versionAdvertised>false</versionAdvertised> + <reassignAllowed>true</reassignAllowed> + <commandScript> + <script>scripts/application_timeline_server.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + </component> + + <component> + <name>RESOURCEMANAGER</name> + <displayName>ResourceManager</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>false</versionAdvertised> + <reassignAllowed>true</reassignAllowed> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <logs> + <log> + <logId>yarn_resourcemanager</logId> + <primary>true</primary> + </log> + <log> + <logId>yarn_historyserver</logId> + </log> + <log> + <logId>yarn_jobsummary</logId> + </log> + </logs> + <customCommands> + <customCommand> + <name>DECOMMISSION</name> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </customCommand> + <customCommand> + <name>REFRESHQUEUES</name> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </customCommand> + </customCommands> + <configuration-dependencies> + <config-type>capacity-scheduler</config-type> + <config-type>hdfs-site</config-type> + </configuration-dependencies> + </component> + + <component> + <name>NODEMANAGER</name> + <displayName>NodeManager</displayName> + <category>SLAVE</category> + <cardinality>1+</cardinality> + <versionAdvertised>false</versionAdvertised> + <decommissionAllowed>true</decommissionAllowed> + <commandScript> + <script>scripts/nodemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <bulkCommands> + <displayName>NodeManagers</displayName> + <!-- Used by decommission and recommission --> + <masterComponent>RESOURCEMANAGER</masterComponent> + </bulkCommands> + <logs> + <log> + <logId>yarn_nodemanager</logId> + </log> + </logs> + </component> + + <component> + <name>YARN_CLIENT</name> + <displayName>YARN Client</displayName> + <category>CLIENT</category> + <cardinality>1+</cardinality> + <versionAdvertised>false</versionAdvertised> + <commandScript> + <script>scripts/yarn_client.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <configFiles> + <configFile> + <type>xml</type> + <fileName>yarn-site.xml</fileName> + <dictionaryName>yarn-site</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>core-site.xml</fileName> + <dictionaryName>core-site</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>yarn-env.sh</fileName> + <dictionaryName>yarn-env</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>log4j.properties</fileName> + <dictionaryName>hdfs-log4j,yarn-log4j</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>capacity-scheduler.xml</fileName> + <dictionaryName>capacity-scheduler</dictionaryName> + </configFile> + </configFiles> + </component> + </components> + + <osSpecifics> + <osSpecific> + <osFamily>any</osFamily> + <packages> + <package> + <name>hadoop-yarn</name> + </package> + <package> + <name>hadoop-hdfs</name> + </package> + <package> + <name>hadoop-mapreduce</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + + <commandScript> + <script>scripts/service_check.py</script> + <scriptType>PYTHON</scriptType> + <timeout>300</timeout> + </commandScript> + + <requiredServices> + <service>HDFS</service> + <service>MAPREDUCE2</service> + </requiredServices> + + <configuration-dependencies> + <config-type>yarn-site</config-type> + <config-type>yarn-env</config-type> + <config-type>hdfs-site</config-type> + <config-type>hadoop-env</config-type> + <config-type>core-site</config-type> + <config-type>mapred-site</config-type> + <config-type>yarn-log4j</config-type> + <config-type>ams-ssl-client</config-type> + <config-type>ranger-yarn-plugin-properties</config-type> + <config-type>ranger-yarn-audit</config-type> + <config-type>ranger-yarn-policymgr-ssl</config-type> + <config-type>ranger-yarn-security</config-type> + </configuration-dependencies> + <widgetsFileName>YARN_widgets.json</widgetsFileName> + <metricsFileName>YARN_metrics.json</metricsFileName> + </service> + + <service> + <name>MAPREDUCE2</name> + <displayName>MapReduce2</displayName> + <comment>Apache Hadoop NextGen MapReduce (YARN)</comment> + <version>2.7.1+odpi</version> + <components> + <component> + <name>HISTORYSERVER</name> + <displayName>History Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>false</versionAdvertised> + <reassignAllowed>true</reassignAllowed> + <auto-deploy> + <enabled>true</enabled> + <co-locate>YARN/RESOURCEMANAGER</co-locate> + </auto-deploy> + <dependencies> + <dependency> + <name>HDFS/HDFS_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <commandScript> + <script>scripts/historyserver.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <logs> + <log> + <logId>mapred_historyserver</logId> + <primary>true</primary> + </log> + </logs> + </component> + + <component> + <name>MAPREDUCE2_CLIENT</name> + <displayName>MapReduce2 Client</displayName> + <category>CLIENT</category> + <cardinality>0+</cardinality> + <versionAdvertised>false</versionAdvertised> + <commandScript> + <script>scripts/mapreduce2_client.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <configFiles> + <configFile> + <type>xml</type> + <fileName>mapred-site.xml</fileName> + <dictionaryName>mapred-site</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>core-site.xml</fileName> + <dictionaryName>core-site</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>mapred-env.sh</fileName> + <dictionaryName>mapred-env</dictionaryName> + </configFile> + </configFiles> + </component> + </components> + + <osSpecifics> + <osSpecific> + <osFamily>any</osFamily> + <packages> + <package> + <name>hadoop-mapreduce</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + + <commandScript> + <script>scripts/mapred_service_check.py</script> + <scriptType>PYTHON</scriptType> + <timeout>300</timeout> + </commandScript> + + <requiredServices> + <service>YARN</service> + </requiredServices> + + <configuration-dir>configuration-mapred</configuration-dir> + + <configuration-dependencies> + <config-type>hdfs-site</config-type> + <config-type>hadoop-env</config-type> + <config-type>core-site</config-type> + <config-type>mapred-site</config-type> + <config-type>mapred-env</config-type> + <config-type>ssl-client</config-type> + <config-type>ssl-server</config-type> + <config-type>ams-ssl-client</config-type> + </configuration-dependencies> + <restartRequiredAfterRackChange>true</restartRequiredAfterRackChange> + <widgetsFileName>MAPREDUCE2_widgets.json</widgetsFileName> + <metricsFileName>MAPREDUCE2_metrics.json</metricsFileName> + </service> + </services> +</metainfo> http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanager_health.py ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanager_health.py b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanager_health.py new file mode 100755 index 0000000..d7159e4 --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanager_health.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. +import socket +import urllib2 +import logging +import traceback +from ambari_commons import OSCheck +from ambari_commons.inet_utils import resolve_address +from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER +from resource_management.core.environment import Environment + +RESULT_CODE_OK = 'OK' +RESULT_CODE_CRITICAL = 'CRITICAL' +RESULT_CODE_UNKNOWN = 'UNKNOWN' + +NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.address}}' +NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.https.address}}' +YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}' + +OK_MESSAGE = 'NodeManager Healthy' +CRITICAL_CONNECTION_MESSAGE = 'Connection failed to {0} ({1})' +CRITICAL_HTTP_STATUS_MESSAGE = 'HTTP {0} returned from {1} ({2}) \n{3}' +CRITICAL_NODEMANAGER_STATUS_MESSAGE = 'NodeManager returned an unexpected status of "{0}"' +CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager health from unexpected JSON response' + +KERBEROS_KEYTAB = '{{yarn-site/yarn.nodemanager.webapp.spnego-keytab-file}}' +KERBEROS_PRINCIPAL = '{{yarn-site/yarn.nodemanager.webapp.spnego-principal}}' +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' +EXECUTABLE_SEARCH_PATHS = '{{kerberos-env/executable_search_paths}}' + +NODEMANAGER_DEFAULT_PORT = 8042 + +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 + +LOGGER_EXCEPTION_MESSAGE = "[Alert] NodeManager Health on {0} fails:" +logger = logging.getLogger('ambari_alerts') + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (NODEMANAGER_HTTP_ADDRESS_KEY,NODEMANAGER_HTTPS_ADDRESS_KEY, EXECUTABLE_SEARCH_PATHS, + YARN_HTTP_POLICY_KEY, SMOKEUSER_KEY, KERBEROS_KEYTAB, KERBEROS_PRINCIPAL, SECURITY_ENABLED_KEY) + + +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + result_code = RESULT_CODE_UNKNOWN + + if configurations is None: + return (result_code, ['There were no configurations supplied to the script.']) + + if host_name is None: + host_name = socket.getfqdn() + + scheme = 'http' + http_uri = None + https_uri = None + http_policy = 'HTTP_ONLY' + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + executable_paths = None + if EXECUTABLE_SEARCH_PATHS in configurations: + executable_paths = configurations[EXECUTABLE_SEARCH_PATHS] + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + kerberos_keytab = None + if KERBEROS_KEYTAB in configurations: + kerberos_keytab = configurations[KERBEROS_KEYTAB] + + kerberos_principal = None + if KERBEROS_PRINCIPAL in configurations: + kerberos_principal = configurations[KERBEROS_PRINCIPAL] + kerberos_principal = kerberos_principal.replace('_HOST', host_name) + + if NODEMANAGER_HTTP_ADDRESS_KEY in configurations: + http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY] + + if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations: + https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY] + + if YARN_HTTP_POLICY_KEY in configurations: + http_policy = configurations[YARN_HTTP_POLICY_KEY] + + + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + + + # determine the right URI and whether to use SSL + host_port = http_uri + if http_policy == 'HTTPS_ONLY': + scheme = 'https' + + if https_uri is not None: + host_port = https_uri + + label = '' + url_response = None + node_healthy = 'false' + total_time = 0 + + # replace hostname on host fqdn to make it work on all environments + if host_port is not None: + if ":" in host_port: + uri_host, uri_port = host_port.split(':') + host_port = '{0}:{1}'.format(host_name, uri_port) + else: + host_port = host_name + + # some yarn-site structures don't have the web ui address + if host_port is None: + host_port = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT) + + query = "{0}://{1}/ws/v1/node/info".format(scheme, host_port) + + try: + if kerberos_principal is not None and kerberos_keytab is not None and security_enabled: + env = Environment.get_instance() + + # curl requires an integer timeout + curl_connection_timeout = int(connection_timeout) + + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + + url_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, + query, "nm_health_alert", executable_paths, False, "NodeManager Health", smokeuser, + connection_timeout=curl_connection_timeout, kinit_timer_ms = kinit_timer_ms) + + json_response = json.loads(url_response) + else: + # execute the query for the JSON that includes templeton status + url_response = urllib2.urlopen(query, timeout=connection_timeout) + json_response = json.loads(url_response.read()) + except urllib2.HTTPError, httpError: + label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query, + str(httpError), traceback.format_exc()) + + return (RESULT_CODE_CRITICAL, [label]) + except: + label = CRITICAL_CONNECTION_MESSAGE.format(query, traceback.format_exc()) + return (RESULT_CODE_CRITICAL, [label]) + + # URL response received, parse it + try: + node_healthy = json_response['nodeInfo']['nodeHealthy'] + node_healthy_report = json_response['nodeInfo']['healthReport'] + + # convert boolean to string + node_healthy = str(node_healthy) + except: + return (RESULT_CODE_CRITICAL, [query + "\n" + traceback.format_exc()]) + finally: + if url_response is not None: + try: + url_response.close() + except: + pass + + # proper JSON received, compare against known value + if node_healthy.lower() == 'true': + result_code = RESULT_CODE_OK + label = OK_MESSAGE + elif node_healthy.lower() == 'false': + result_code = RESULT_CODE_CRITICAL + label = node_healthy_report + else: + result_code = RESULT_CODE_CRITICAL + label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy) + + return (result_code, [label]) http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanagers_summary.py ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanagers_summary.py b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanagers_summary.py new file mode 100755 index 0000000..adf27ec --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/alerts/alert_nodemanagers_summary.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import urllib2 +import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. +import logging +import traceback + +from ambari_commons.urllib_handlers import RefreshHeaderProcessor +from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER +from resource_management.core.environment import Environment + +ERROR_LABEL = '{0} NodeManager{1} {2} unhealthy.' +OK_LABEL = 'All NodeManagers are healthy' + +NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.address}}' +NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}' +YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}' + +KERBEROS_KEYTAB = '{{yarn-site/yarn.nodemanager.webapp.spnego-keytab-file}}' +KERBEROS_PRINCIPAL = '{{yarn-site/yarn.nodemanager.webapp.spnego-principal}}' +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' +EXECUTABLE_SEARCH_PATHS = '{{kerberos-env/executable_search_paths}}' + +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 + +LOGGER_EXCEPTION_MESSAGE = "[Alert] NodeManager Health Summary on {0} fails:" +logger = logging.getLogger('ambari_alerts') + +QRY = "Hadoop:service=ResourceManager,name=RMNMInfo" + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return NODEMANAGER_HTTP_ADDRESS_KEY, NODEMANAGER_HTTPS_ADDRESS_KEY, EXECUTABLE_SEARCH_PATHS, \ + YARN_HTTP_POLICY_KEY, SMOKEUSER_KEY, KERBEROS_KEYTAB, KERBEROS_PRINCIPAL, SECURITY_ENABLED_KEY + + +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + if configurations is None: + return (('UNKNOWN', ['There were no configurations supplied to the script.'])) + + scheme = 'http' + http_uri = None + https_uri = None + http_policy = 'HTTP_ONLY' + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + executable_paths = None + if EXECUTABLE_SEARCH_PATHS in configurations: + executable_paths = configurations[EXECUTABLE_SEARCH_PATHS] + + kerberos_keytab = None + if KERBEROS_KEYTAB in configurations: + kerberos_keytab = configurations[KERBEROS_KEYTAB] + + kerberos_principal = None + if KERBEROS_PRINCIPAL in configurations: + kerberos_principal = configurations[KERBEROS_PRINCIPAL] + kerberos_principal = kerberos_principal.replace('_HOST', host_name) + + if NODEMANAGER_HTTP_ADDRESS_KEY in configurations: + http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY] + + if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations: + https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY] + + if YARN_HTTP_POLICY_KEY in configurations: + http_policy = configurations[YARN_HTTP_POLICY_KEY] + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + + # determine the right URI and whether to use SSL + uri = http_uri + if http_policy == 'HTTPS_ONLY': + scheme = 'https' + + if https_uri is not None: + uri = https_uri + + uri = str(host_name) + ":" + uri.split(":")[1] + live_nodemanagers_qry = "{0}://{1}/jmx?qry={2}".format(scheme, uri, QRY) + convert_to_json_failed = False + response_code = None + try: + if kerberos_principal is not None and kerberos_keytab is not None and security_enabled: + env = Environment.get_instance() + + # curl requires an integer timeout + curl_connection_timeout = int(connection_timeout) + + url_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, + live_nodemanagers_qry, "nm_health_summary_alert", executable_paths, False, + "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) + + try: + url_response_json = json.loads(url_response) + live_nodemanagers = json.loads(find_value_in_jmx(url_response_json, "LiveNodeManagers", live_nodemanagers_qry)) + except ValueError, error: + convert_to_json_failed = True + logger.exception("[Alert][{0}] Convert response to json failed or json doesn't contain needed data: {1}". + format("NodeManager Health Summary", str(error))) + + if convert_to_json_failed: + response_code, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, + live_nodemanagers_qry, "nm_health_summary_alert", executable_paths, True, + "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) + else: + live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry, + "LiveNodeManagers", connection_timeout)) + + if kerberos_principal is not None and kerberos_keytab is not None and security_enabled: + if response_code in [200, 307] and convert_to_json_failed: + return ('UNKNOWN', ['HTTP {0} response (metrics unavailable)'.format(str(response_code))]) + elif convert_to_json_failed and response_code not in [200, 307]: + raise Exception("[Alert][NodeManager Health Summary] Getting data from {0} failed with http code {1}".format( + str(live_nodemanagers_qry), str(response_code))) + + unhealthy_count = 0 + + for nodemanager in live_nodemanagers: + health_report = nodemanager['State'] + if health_report == 'UNHEALTHY': + unhealthy_count += 1 + + if unhealthy_count == 0: + result_code = 'OK' + label = OK_LABEL + else: + result_code = 'CRITICAL' + if unhealthy_count == 1: + label = ERROR_LABEL.format(unhealthy_count, '', 'is') + else: + label = ERROR_LABEL.format(unhealthy_count, 's', 'are') + + except: + label = traceback.format_exc() + result_code = 'UNKNOWN' + + return (result_code, [label]) + + +def get_value_from_jmx(query, jmx_property, connection_timeout): + response = None + + try: + # use a customer header process that will look for the non-standard + # "Refresh" header and attempt to follow the redirect + url_opener = urllib2.build_opener(RefreshHeaderProcessor()) + response = url_opener.open(query, timeout=connection_timeout) + + data = response.read() + data_dict = json.loads(data) + return find_value_in_jmx(data_dict, jmx_property, query) + finally: + if response is not None: + try: + response.close() + except: + pass + + +def find_value_in_jmx(data_dict, jmx_property, query): + json_data = data_dict["beans"][0] + + if jmx_property not in json_data: + beans = data_dict['beans'] + for jmx_prop_list_item in beans: + if "name" in jmx_prop_list_item and jmx_prop_list_item["name"] == QRY: + if jmx_property not in jmx_prop_list_item: + raise Exception("Unable to find {0} in JSON from {1} ".format(jmx_property, query)) + json_data = jmx_prop_list_item + + return json_data[jmx_property] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/files/validateYarnComponentStatusWindows.py ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/files/validateYarnComponentStatusWindows.py b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/files/validateYarnComponentStatusWindows.py new file mode 100755 index 0000000..5e2b4d9 --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/files/validateYarnComponentStatusWindows.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python + +''' +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import optparse +import subprocess +import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. +import urllib2 + +RESOURCEMANAGER = 'rm' +NODEMANAGER = 'nm' +HISTORYSERVER = 'hs' + +STARTED_STATE = 'STARTED' +RUNNING_STATE = 'RUNNING' + +#Return reponse for given path and address +def getResponse(path, address, ssl_enabled): + if ssl_enabled: + url = 'https://' + address + path + else: + url = 'http://' + address + path + + try: + handle = urllib2.urlopen(url) + output = handle.read() + handle.close() + response = json.loads(output) + if response == None: + print 'There is no response for url: ' + str(url) + exit(1) + return response + except Exception as e: + print 'Error getting response for url:' + str(url), e + exit(1) + +#Verify that REST api is available for given component +def validateAvailability(component, path, address, ssl_enabled): + + try: + response = getResponse(path, address, ssl_enabled) + is_valid = validateAvailabilityResponse(component, response) + if not is_valid: + exit(1) + except Exception as e: + print 'Error checking availability status of component', e + exit(1) + +#Validate component-specific response +def validateAvailabilityResponse(component, response): + try: + if component == RESOURCEMANAGER: + rm_state = response['clusterInfo']['state'] + if rm_state == STARTED_STATE: + return True + else: + print 'Resourcemanager is not started' + return False + + elif component == NODEMANAGER: + node_healthy = bool(response['nodeInfo']['nodeHealthy']) + if node_healthy: + return True + else: + return False + elif component == HISTORYSERVER: + hs_start_time = response['historyInfo']['startedOn'] + if hs_start_time > 0: + return True + else: + return False + else: + return False + except Exception as e: + print 'Error validation of availability response for ' + str(component), e + return False + +#Verify that component has required resources to work +def validateAbility(component, path, address, ssl_enabled): + + try: + response = getResponse(path, address, ssl_enabled) + is_valid = validateAbilityResponse(component, response) + if not is_valid: + exit(1) + except Exception as e: + print 'Error checking ability of component', e + exit(1) + +#Validate component-specific response that it has required resources to work +def validateAbilityResponse(component, response): + try: + if component == RESOURCEMANAGER: + nodes = [] + if response.has_key('nodes') and not response['nodes'] == None and response['nodes'].has_key('node'): + nodes = response['nodes']['node'] + connected_nodes_count = len(nodes) + if connected_nodes_count == 0: + print 'There is no connected nodemanagers to resourcemanager' + return False + active_nodes = filter(lambda x: x['state'] == RUNNING_STATE, nodes) + active_nodes_count = len(active_nodes) + + if connected_nodes_count == 0: + print 'There is no connected active nodemanagers to resourcemanager' + return False + else: + return True + else: + return False + except Exception as e: + print 'Error validation of ability response', e + return False + +# +# Main. +# +def main(): + parser = optparse.OptionParser(usage="usage: %prog [options] component ") + parser.add_option("-p", "--port", dest="address", help="Host:Port for REST API of a desired component") + parser.add_option("-s", "--ssl", dest="ssl_enabled", help="Is SSL enabled for UI of component") + + (options, args) = parser.parse_args() + + component = args[0] + + address = options.address + ssl_enabled = (options.ssl_enabled) in 'true' + if component == RESOURCEMANAGER: + path = '/ws/v1/cluster/info' + elif component == NODEMANAGER: + path = '/ws/v1/node/info' + elif component == HISTORYSERVER: + path = '/ws/v1/history/info' + else: + parser.error("Invalid component") + + validateAvailability(component, path, address, ssl_enabled) + + if component == RESOURCEMANAGER: + path = '/ws/v1/cluster/nodes' + validateAbility(component, path, address, ssl_enabled) + +if __name__ == "__main__": + main() http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/__init__.py ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/__init__.py b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/__init__.py new file mode 100755 index 0000000..35de4bb --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/__init__.py @@ -0,0 +1,20 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/application_timeline_server.py ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/application_timeline_server.py b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/application_timeline_server.py new file mode 100755 index 0000000..4ec6aa7 --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/application_timeline_server.py @@ -0,0 +1,155 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import stack_select +from resource_management.libraries.functions import StackFeature +from resource_management.libraries.functions.stack_features import check_stack_feature +from resource_management.libraries.functions.security_commons import build_expectations, \ + cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties,\ + FILE_TYPE_XML +from resource_management.libraries.functions.format import format +from yarn import yarn +from service import service +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl + + +class ApplicationTimelineServer(Script): + def install(self, env): + self.install_packages(env) + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) # FOR SECURITY + service('timelineserver', action='start') + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + service('timelineserver', action='stop') + + def configure(self, env): + import params + env.set_params(params) + yarn(name='apptimelineserver') + + +@OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY) +class ApplicationTimelineServerWindows(ApplicationTimelineServer): + def status(self, env): + service('timelineserver', action='status') + + +@OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT) +class ApplicationTimelineServerDefault(ApplicationTimelineServer): + def get_component_name(self): + return "hadoop-yarn-timelineserver" + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing Stack Upgrade pre-restart") + import params + env.set_params(params) + + if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-yarn-timelineserver", params.version) + + def status(self, env): + import status_params + env.set_params(status_params) + Execute(format("mv {yarn_historyserver_pid_file_old} {yarn_historyserver_pid_file}"), + only_if = format("test -e {yarn_historyserver_pid_file_old}", user=status_params.yarn_user)) + functions.check_process_status(status_params.yarn_historyserver_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + props_value_check = {"yarn.timeline-service.enabled": "true", + "yarn.timeline-service.http-authentication.type": "kerberos", + "yarn.acl.enable": "true"} + props_empty_check = ["yarn.timeline-service.principal", + "yarn.timeline-service.keytab", + "yarn.timeline-service.http-authentication.kerberos.principal", + "yarn.timeline-service.http-authentication.kerberos.keytab"] + + props_read_check = ["yarn.timeline-service.keytab", + "yarn.timeline-service.http-authentication.kerberos.keytab"] + yarn_site_props = build_expectations('yarn-site', props_value_check, props_empty_check, + props_read_check) + + yarn_expectations ={} + yarn_expectations.update(yarn_site_props) + + security_params = get_params_from_filesystem(status_params.hadoop_conf_dir, + {'yarn-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params, yarn_expectations) + if not result_issues: # If all validations passed successfully + try: + # Double check the dict before calling execute + if ( 'yarn-site' not in security_params + or 'yarn.timeline-service.keytab' not in security_params['yarn-site'] + or 'yarn.timeline-service.principal' not in security_params['yarn-site']) \ + or 'yarn.timeline-service.http-authentication.kerberos.keytab' not in security_params['yarn-site'] \ + or 'yarn.timeline-service.http-authentication.kerberos.principal' not in security_params['yarn-site']: + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out( + {"securityIssuesFound": "Keytab file or principal are not set property."}) + return + + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.timeline-service.keytab'], + security_params['yarn-site']['yarn.timeline-service.principal'], + status_params.hostname, + status_params.tmp_dir) + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.timeline-service.http-authentication.kerberos.keytab'], + security_params['yarn-site']['yarn.timeline-service.http-authentication.kerberos.principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + + def get_log_folder(self): + import params + return params.yarn_log_dir + + def get_user(self): + import params + return params.yarn_user + +if __name__ == "__main__": + ApplicationTimelineServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/075cecbf/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/historyserver.py ---------------------------------------------------------------------- diff --git a/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/historyserver.py b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/historyserver.py new file mode 100755 index 0000000..34c683a --- /dev/null +++ b/contrib/management-packs/odpi-ambari-mpack/src/main/resources/stacks/ODPi/2.0/services/YARN/package/scripts/historyserver.py @@ -0,0 +1,190 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management.libraries.script.script import Script +from resource_management.libraries.resources.hdfs_resource import HdfsResource +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import stack_select +from resource_management.libraries.functions import StackFeature +from resource_management.libraries.functions.stack_features import check_stack_feature +from resource_management.libraries.functions.check_process_status import check_process_status +from resource_management.libraries.functions.copy_tarball import copy_to_hdfs +from resource_management.libraries.functions.format import format +from resource_management.libraries.functions.security_commons import build_expectations, \ + cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties, \ + FILE_TYPE_XML +from resource_management.core.source import Template +from resource_management.core.logger import Logger + +from install_jars import install_tez_jars +from yarn import yarn +from service import service +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl + + +class HistoryServer(Script): + def install(self, env): + self.install_packages(env) + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + service('historyserver', action='stop', serviceName='mapreduce') + + def configure(self, env): + import params + env.set_params(params) + yarn(name="historyserver") + + +@OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY) +class HistoryserverWindows(HistoryServer): + def start(self, env): + import params + env.set_params(params) + self.configure(env) + service('historyserver', action='start', serviceName='mapreduce') + + def status(self, env): + service('historyserver', action='status') + + +@OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT) +class HistoryServerDefault(HistoryServer): + def get_component_name(self): + return "hadoop-mapreduce-historyserver" + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing Stack Upgrade pre-restart") + import params + env.set_params(params) + + if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-mapreduce-historyserver", params.version) + # MC Hammer said, "Can't touch this" + copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) + copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) + copy_to_hdfs("slider", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) + params.HdfsResource(None, action="execute") + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) # FOR SECURITY + + if params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major): + # MC Hammer said, "Can't touch this" + resource_created = copy_to_hdfs( + "mapreduce", + params.user_group, + params.hdfs_user, + host_sys_prepped=params.host_sys_prepped) + resource_created = copy_to_hdfs( + "tez", + params.user_group, + params.hdfs_user, + host_sys_prepped=params.host_sys_prepped) or resource_created + resource_created = copy_to_hdfs( + "slider", + params.user_group, + params.hdfs_user, + host_sys_prepped=params.host_sys_prepped) or resource_created + if resource_created: + params.HdfsResource(None, action="execute") + else: + # In stack versions before copy_tarball_to_hdfs support tez.tar.gz was copied to a different folder in HDFS. + install_tez_jars() + + service('historyserver', action='start', serviceName='mapreduce') + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.mapred_historyserver_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + expectations = {} + expectations.update(build_expectations('mapred-site', + None, + [ + 'mapreduce.jobhistory.keytab', + 'mapreduce.jobhistory.principal', + 'mapreduce.jobhistory.webapp.spnego-keytab-file', + 'mapreduce.jobhistory.webapp.spnego-principal' + ], + None)) + + security_params = get_params_from_filesystem(status_params.hadoop_conf_dir, + {'mapred-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params, expectations) + if not result_issues: # If all validations passed successfully + try: + # Double check the dict before calling execute + if ( 'mapred-site' not in security_params or + 'mapreduce.jobhistory.keytab' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.principal' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.webapp.spnego-keytab-file' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.webapp.spnego-principal' not in security_params['mapred-site']): + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out( + {"securityIssuesFound": "Keytab file or principal not set."}) + return + + cached_kinit_executor(status_params.kinit_path_local, + status_params.mapred_user, + security_params['mapred-site']['mapreduce.jobhistory.keytab'], + security_params['mapred-site']['mapreduce.jobhistory.principal'], + status_params.hostname, + status_params.tmp_dir) + cached_kinit_executor(status_params.kinit_path_local, + status_params.mapred_user, + security_params['mapred-site']['mapreduce.jobhistory.webapp.spnego-keytab-file'], + security_params['mapred-site']['mapreduce.jobhistory.webapp.spnego-principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + + def get_log_folder(self): + import params + return params.mapred_log_dir + + def get_user(self): + import params + return params.mapred_user + +if __name__ == "__main__": + HistoryServer().execute()